def calibrate(reck_heaters, calib, heater_name, heater_index, input, outputs, minV, maxV, N, int_time, counting): metadata = { 'label': 'Fringe Sweep', 'Counting Device': counting, 'heater': (heater_index, heater_name), 'Input': input, 'Outputs': outputs, 'Int_time': int_time, 'MinV': minV, 'MaxV': maxV } if counting == 'cc': datafilename = take_fringe(reck_heaters, heater_index, minV, maxV, N, int_time, metadata) elif counting == 'pm': datafilename = pm_take_fringe(reck_heaters, heater_index, minV, maxV, N, int_time, metadata) data_for_fitting = parse(datafilename, outputs) voltages = np.linspace(minV, maxV, N) #calib.datafilename = data_for_fitting calib.fit(heater_index, voltages, data_for_fitting, graph=True)
def format_data(): print "Opening file..." wb = open_file() print "Reading file..." wb_data = read_wb(wb) print "Formatting data..." formatted_data = parse_data.parse(wb_data) with open (output_json_file, 'w') as output_file: json.dump(formatted_data, output_file) print "Complete!"
def calibrate(reck_heaters, calib, heater_name, heater_index, input, outputs, minV, maxV, N, int_time, counting): metadata={'label':'Fringe Sweep', 'Counting Device': counting, 'heater': (heater_index, heater_name), 'Input': input, 'Outputs': outputs, 'Int_time': int_time, 'MinV': minV, 'MaxV': maxV} if counting == 'cc': datafilename = take_fringe(reck_heaters, heater_index, minV, maxV, N, int_time, metadata) elif counting == 'pm': datafilename = pm_take_fringe(reck_heaters, heater_index, minV, maxV, N, int_time, metadata) data_for_fitting = parse(datafilename,outputs) voltages = np.linspace(minV, maxV, N) #calib.datafilename = data_for_fitting calib.fit(heater_index, voltages, data_for_fitting, graph = True)
def run(self): while 1: try: data = record() parsed_data = parse(data) ax = parsed_data[0] ay = parsed_data[1] az = parsed_data[2] gx = parsed_data[3] gy = parsed_data[4] gz = parsed_data[5] clazz = classify(ax, ay, az, gx, gy, gz) self.perform_action(clazz) print 'Detected: ' + clazz except Exception as e: print "Something went wrong in gestureThread: ", e
def collect_all_features(filenames, model_dir="modelsIlya"): df = parse(filenames) data = pd.DataFrame() data["dialogId"] = df["dialogId"].tolist() + df["dialogId"].tolist() data["context"] = df["context"].tolist() + df["context"].tolist() data["userMessages"] = df["AliceMessages"].tolist() + df["BobMessages"].tolist() data["userOpponentMessages"] = df["BobMessages"].tolist() + df["AliceMessages"].tolist() data["userMessageMask"] = df["AliceMessageMask"].tolist() + df["BobMessageMask"].tolist() separator = " " data["userConcatenatedMessages"] = data["userMessages"].apply(lambda x: separator.join(x)) data["userOpponentConcatenatedMessages"] = data["userOpponentMessages"].apply(lambda x: separator.join(x)) data["userIsBot"] = df["AliceIsBot"].tolist() + df["BobIsBot"].tolist() data["userScores"] = df["AliceScore"].tolist() + df["BobScore"].tolist() hand_crafted_enable = True custom_enable = True bow_enable = True boc_enable = True rhand_crafted_enable = False if hand_crafted_enable: data["isEmpty"] = data["userMessages"].apply(lambda x: len(x) == 0) data["isEmptyDialog"] = (data["userOpponentMessages"].apply(lambda x: len(x) == 0)) & \ (data["userMessages"].apply(lambda x: len(x) == 0)) data["messageNum"] = data["userMessages"].apply(lambda x: len(x)) data["numChars"] = data["userMessages"].apply(lambda x: sum([len(msg) for msg in x])) data["numWords"] = data["userMessages"].apply(lambda x: sum([len(msg.split()) for msg in x])) data["avgChars"] = data["userMessages"].apply(lambda x: np.mean([0] + [len(msg) for msg in x])) data["avgWords"] = data["userMessages"].apply(lambda x: np.mean([0] + [len(msg.split()) for msg in x])) if custom_enable: with open("../words.txt") as wordfile: system_words = set(x.strip().lower() for x in wordfile.readlines()) masks = data["userMessageMask"].tolist() data["msgInARow"] = [max([0] + [len(list(x)) for x in (g for k, g in itertools.groupby(mask) if k == 1)]) for mask in masks] not_dict_word_count = [sum([1 for word in text_to_wordlist(msg) if word not in system_words]) for msg in data["userConcatenatedMessages"].tolist()] len_msg = [len(text_to_wordlist(msg, remove_stopwords=False)) for msg in data["userConcatenatedMessages"].tolist()] data["typoCount"] = not_dict_word_count data["typoCountPart"] = [float(count) / (1 + len_msg[i]) for i, count in enumerate(not_dict_word_count)] context_word_count = [sum([1 for word in text_to_wordlist(text) if word in data["context"].tolist()[i]]) for i, text in enumerate(data["userConcatenatedMessages"].tolist())] data["relevantWords"] = context_word_count data["relevantWordsPart"] = [float(count) / (1 + len_msg[i]) for i, count in enumerate(context_word_count)] data["groupOf1"] = [sum([len(list(x)) == 1 for x in (g for k, g in itertools.groupby(mask) if k == 1)]) for mask in masks] data["groupOfNot1"] = [sum([len(list(x)) != 1 for x in (g for k, g in itertools.groupby(mask) if k == 1)]) for mask in masks] stopwords = set(stop_words.get_stop_words("english")) data["stopWordsCount"] = [sum([1 for word in text_to_wordlist(msg, remove_stopwords=False) if word in stopwords]) for msg in data["userConcatenatedMessages"].tolist()] data["notStopWordsCount"] = [sum([1 for word in text_to_wordlist(msg, remove_stopwords=False) if word not in stopwords]) for msg in data["userConcatenatedMessages"].tolist()] if rhand_crafted_enable: data["RmessageNum"] = data["userOpponentMessages"].apply(lambda x: len(x)) data["RnumChars"] = data["userOpponentMessages"].apply(lambda x: sum([len(msg) for msg in x])) data["RnumWords"] = data["userOpponentMessages"].apply(lambda x: sum([len(msg.split()) for msg in x])) data["RavgChars"] = data["userOpponentMessages"].apply(lambda x: np.mean([0] + [len(msg) for msg in x])) data["RavgWords"] = data["userOpponentMessages"].apply(lambda x: np.mean([0] + [len(msg.split()) for msg in x])) if bow_enable: print("BoW step...") dump_filename = os.path.join(model_dir, "bow_vectorizer.pickle") vectorizer = None if os.path.exists(dump_filename): with open(dump_filename, "rb") as f: vectorizer = pickle.load(f) bow_train_data, _, vectorizer = bow(data["userConcatenatedMessages"].tolist(), [], tokenizer=text_to_wordlist, bow_ngrams=(1, 2), vectorizer=vectorizer) data = pd.concat([data, pd.DataFrame(bow_train_data)], axis=1) if not os.path.exists(dump_filename): with open(dump_filename, "wb") as f: pickle.dump(vectorizer, f) if boc_enable: print("BoC step...") dump_filename = os.path.join(model_dir, "boc_vectorizer.pickle") vectorizer = None if os.path.exists(dump_filename): with open(dump_filename, "rb") as f: vectorizer = pickle.load(f) bow_train_data, _, vectorizer = bow(data["userConcatenatedMessages"].tolist(), [], tokenizer=text_to_charlist, bow_ngrams=(1, 3), vectorizer=vectorizer) data = pd.concat([data, pd.DataFrame(bow_train_data)], axis=1) if not os.path.exists(dump_filename): with open(dump_filename, "wb") as f: pickle.dump(vectorizer, f) return data
# 将该py的进程号放到 url_manager.py 下的 url_get_py_id 中 url_get_py_id.put(py_id) # 从 url_manager.py 下的 new_url 队列取 url stop_flag = 0 # 多次没取到待爬取的url,就停掉该 url_get.py while True: try: url = new_url.get(timeout=1) # 从待爬取的url队列中取出一个url if not new_url.empty( ) and psutil.virtual_memory().percent < settings.MAX_MEMORY_USE: p = multiprocessing.Process(target=run_url_get_py) p.start() # 用start()方法启动 try: data = parse_data.parse(url) # 爬取逻辑 save_to_db.save(data) print('爬取成功', url) except: print('!!!!爬取失败', url) new_url.put(url) except: print('{} 没有待爬取的url了'.format(str(py_id))) stop_flag += 1 time.sleep(5) finally: if stop_flag >= 10: break