Beispiel #1
0
def calibrate(reck_heaters, calib, heater_name, heater_index, input, outputs,
              minV, maxV, N, int_time, counting):

    metadata = {
        'label': 'Fringe Sweep',
        'Counting Device': counting,
        'heater': (heater_index, heater_name),
        'Input': input,
        'Outputs': outputs,
        'Int_time': int_time,
        'MinV': minV,
        'MaxV': maxV
    }

    if counting == 'cc':
        datafilename = take_fringe(reck_heaters, heater_index, minV, maxV, N,
                                   int_time, metadata)
    elif counting == 'pm':
        datafilename = pm_take_fringe(reck_heaters, heater_index, minV, maxV,
                                      N, int_time, metadata)

    data_for_fitting = parse(datafilename, outputs)

    voltages = np.linspace(minV, maxV, N)
    #calib.datafilename = data_for_fitting
    calib.fit(heater_index, voltages, data_for_fitting, graph=True)
Beispiel #2
0
def format_data():
    print "Opening file..."
    wb = open_file()
    print "Reading file..."
    wb_data = read_wb(wb)
    print "Formatting data..."
    formatted_data = parse_data.parse(wb_data)
    with open (output_json_file, 'w') as output_file:
        json.dump(formatted_data, output_file)
    print "Complete!"
def calibrate(reck_heaters, calib, heater_name, heater_index, input, outputs, minV, maxV, N, int_time, counting):

    metadata={'label':'Fringe Sweep', 'Counting Device': counting, 'heater': (heater_index, heater_name), 'Input': input, 'Outputs': outputs, 'Int_time': int_time, 'MinV': minV, 'MaxV': maxV}
    
    if counting == 'cc': 
        datafilename = take_fringe(reck_heaters, heater_index, minV, maxV, N, int_time, metadata)
    elif counting == 'pm':
        datafilename = pm_take_fringe(reck_heaters, heater_index, minV, maxV, N, int_time, metadata)
    
    data_for_fitting = parse(datafilename,outputs)



    voltages = np.linspace(minV, maxV, N)
    #calib.datafilename = data_for_fitting
    calib.fit(heater_index, voltages, data_for_fitting, graph = True)
Beispiel #4
0
	def run(self):
		while 1:
			try:
				data = record()
				parsed_data = parse(data)

				ax = parsed_data[0]
				ay = parsed_data[1]
				az = parsed_data[2]
				gx = parsed_data[3]
				gy = parsed_data[4]
				gz = parsed_data[5]

				clazz = classify(ax, ay, az, gx, gy, gz)
				self.perform_action(clazz)
				print 'Detected: ' + clazz
			except Exception as e:
				print "Something went wrong in gestureThread: ", e
Beispiel #5
0
def collect_all_features(filenames, model_dir="modelsIlya"):
    df = parse(filenames)
    data = pd.DataFrame()
    data["dialogId"] = df["dialogId"].tolist() + df["dialogId"].tolist()
    data["context"] = df["context"].tolist() + df["context"].tolist()
    data["userMessages"] = df["AliceMessages"].tolist() + df["BobMessages"].tolist()
    data["userOpponentMessages"] = df["BobMessages"].tolist() + df["AliceMessages"].tolist()
    data["userMessageMask"] = df["AliceMessageMask"].tolist() + df["BobMessageMask"].tolist()
    separator = "      "
    data["userConcatenatedMessages"] = data["userMessages"].apply(lambda x: separator.join(x))
    data["userOpponentConcatenatedMessages"] = data["userOpponentMessages"].apply(lambda x: separator.join(x))
    data["userIsBot"] = df["AliceIsBot"].tolist() + df["BobIsBot"].tolist()
    data["userScores"] = df["AliceScore"].tolist() + df["BobScore"].tolist()

    hand_crafted_enable = True
    custom_enable = True
    bow_enable = True
    boc_enable = True
    rhand_crafted_enable = False
    if hand_crafted_enable:
        data["isEmpty"] = data["userMessages"].apply(lambda x: len(x) == 0)
        data["isEmptyDialog"] = (data["userOpponentMessages"].apply(lambda x: len(x) == 0)) & \
                                (data["userMessages"].apply(lambda x: len(x) == 0))
        data["messageNum"] = data["userMessages"].apply(lambda x: len(x))
        data["numChars"] = data["userMessages"].apply(lambda x: sum([len(msg) for msg in x]))
        data["numWords"] = data["userMessages"].apply(lambda x: sum([len(msg.split()) for msg in x]))
        data["avgChars"] = data["userMessages"].apply(lambda x: np.mean([0] + [len(msg) for msg in x]))
        data["avgWords"] = data["userMessages"].apply(lambda x: np.mean([0] + [len(msg.split()) for msg in x]))

        if custom_enable:
            with open("../words.txt") as wordfile:
                system_words = set(x.strip().lower() for x in wordfile.readlines())
            masks = data["userMessageMask"].tolist()
            data["msgInARow"] = [max([0] + [len(list(x)) for x in (g for k, g in itertools.groupby(mask) if k == 1)])
                                 for mask in masks]
            not_dict_word_count = [sum([1 for word in text_to_wordlist(msg) if word not in system_words])
                                   for msg in data["userConcatenatedMessages"].tolist()]
            len_msg = [len(text_to_wordlist(msg, remove_stopwords=False)) for msg in data["userConcatenatedMessages"].tolist()]
            data["typoCount"] = not_dict_word_count
            data["typoCountPart"] = [float(count) / (1 + len_msg[i]) for i, count in enumerate(not_dict_word_count)]
            context_word_count = [sum([1 for word in text_to_wordlist(text) if word in data["context"].tolist()[i]])
                                  for i, text in enumerate(data["userConcatenatedMessages"].tolist())]
            data["relevantWords"] = context_word_count
            data["relevantWordsPart"] = [float(count) / (1 + len_msg[i]) for i, count in enumerate(context_word_count)]
            data["groupOf1"] = [sum([len(list(x)) == 1 for x in (g for k, g in itertools.groupby(mask) if k == 1)])
                                for mask in masks]
            data["groupOfNot1"] = [sum([len(list(x)) != 1 for x in (g for k, g in itertools.groupby(mask) if k == 1)])
                                   for mask in masks]
            stopwords = set(stop_words.get_stop_words("english"))
            data["stopWordsCount"] = [sum([1 for word in text_to_wordlist(msg, remove_stopwords=False)
                                           if word in stopwords]) for msg in data["userConcatenatedMessages"].tolist()]
            data["notStopWordsCount"] = [sum([1 for word in text_to_wordlist(msg, remove_stopwords=False)
                                              if word not in stopwords]) for msg in data["userConcatenatedMessages"].tolist()]

    if rhand_crafted_enable:
        data["RmessageNum"] = data["userOpponentMessages"].apply(lambda x: len(x))
        data["RnumChars"] = data["userOpponentMessages"].apply(lambda x: sum([len(msg) for msg in x]))
        data["RnumWords"] = data["userOpponentMessages"].apply(lambda x: sum([len(msg.split()) for msg in x]))
        data["RavgChars"] = data["userOpponentMessages"].apply(lambda x: np.mean([0] + [len(msg) for msg in x]))
        data["RavgWords"] = data["userOpponentMessages"].apply(lambda x: np.mean([0] + [len(msg.split()) for msg in x]))

    if bow_enable:
        print("BoW step...")
        dump_filename = os.path.join(model_dir, "bow_vectorizer.pickle")
        vectorizer = None
        if os.path.exists(dump_filename):
            with open(dump_filename, "rb") as f:
                vectorizer = pickle.load(f)
        bow_train_data, _, vectorizer = bow(data["userConcatenatedMessages"].tolist(), [],
                                            tokenizer=text_to_wordlist, bow_ngrams=(1, 2), vectorizer=vectorizer)
        data = pd.concat([data, pd.DataFrame(bow_train_data)], axis=1)
        if not os.path.exists(dump_filename):
            with open(dump_filename, "wb") as f:
                pickle.dump(vectorizer, f)

    if boc_enable:
        print("BoC step...")
        dump_filename = os.path.join(model_dir, "boc_vectorizer.pickle")
        vectorizer = None
        if os.path.exists(dump_filename):
            with open(dump_filename, "rb") as f:
                vectorizer = pickle.load(f)
        bow_train_data, _, vectorizer = bow(data["userConcatenatedMessages"].tolist(), [],
                                            tokenizer=text_to_charlist, bow_ngrams=(1, 3), vectorizer=vectorizer)
        data = pd.concat([data, pd.DataFrame(bow_train_data)], axis=1)
        if not os.path.exists(dump_filename):
            with open(dump_filename, "wb") as f:
                pickle.dump(vectorizer, f)
    return data
Beispiel #6
0
# 将该py的进程号放到 url_manager.py 下的 url_get_py_id 中
url_get_py_id.put(py_id)

# 从 url_manager.py 下的 new_url 队列取 url
stop_flag = 0  # 多次没取到待爬取的url,就停掉该 url_get.py
while True:
    try:
        url = new_url.get(timeout=1)  # 从待爬取的url队列中取出一个url

        if not new_url.empty(
        ) and psutil.virtual_memory().percent < settings.MAX_MEMORY_USE:
            p = multiprocessing.Process(target=run_url_get_py)
            p.start()  # 用start()方法启动

        try:
            data = parse_data.parse(url)  # 爬取逻辑
            save_to_db.save(data)
            print('爬取成功', url)
        except:
            print('!!!!爬取失败', url)
            new_url.put(url)

    except:
        print('{} 没有待爬取的url了'.format(str(py_id)))
        stop_flag += 1
        time.sleep(5)

    finally:
        if stop_flag >= 10:
            break