def clean_directory(): """ Clean raw_data directory :return: none """ for each in os.listdir(os.path.join(os.getcwd(), 'raw_data')): os.remove(each) retrieve()
def getDrops(): lng = request.query.lng lat = request.query.lat user_id = request.query.user_id #last update dropslist=retrieve.retrieve(lng, lat, user_id) return str(dropslist)
def main(): subjects=sorted(payload.subject) session=requests.Session() while True: for i in range(len(subjects)): print("{:2d}.{}".format(i+1,subjects[i])) try: choice=input("\nYour choice: ") subject=subjects[int(choice)-1] except Exception: print("Please try again.\n") continue print("You have selected: "+subject) break print("Select the time range of the exam papers") print("Enter ctrl+C to exit") while True: try: from_date=int(input("From (Example:201306): ")) to_date=int(input("To (Example:202001): ")) except Exception: print("Please try again.\n") continue print("You have selected the time range {} to {}\n.".format(str(from_date),str(to_date))) if len(str(from_date)) == 6 and len(str(to_date)) == 6 and from_date <= to_date: break else: print("You have entered an invalied time range, please try again.") print("You can enter an optional keyword") keyword=input("Your keyword (N for none),default is none: ") if keyword == "N" or keyword == "" : keyword="" print("Your keyword is none") elif len(keyword) != 0: keyword=keyword.lower() print("Your keyword is: "+keyword) want_login=input("Some files require login to be downloaded,would you like to login? y/N: ").lower() if want_login == "y" or want_login == "yes": username=input("Username: "******"Password: ") l=login.Login(session,username,password) session=l.start() download.download(session,subject,retrieve.retrieve(subject,keyword,from_date,to_date),login=True) return 0 else: download.download(session,subject,retrieve.retrieve(subject,keyword,from_date,to_date),login=False) return 0
def get_data(filename, data, embeddings, w2i, gensim_model, args): """ Retrieves all data. Load it from a Pickle file if it exists, and create it otherwise. """ global num_words if os.path.exists(filename): all_examples = data_utils.load_pickle(filename) else: all_examples = [] for example in tqdm(data[:10]): resources = [] embedded_resources = [] data_utils.get_resources(example["documents"]["comments"], resources, embedded_resources) data_utils.get_resources(example["documents"]["fact_table"], resources, embedded_resources) data_utils.get_resources(example["documents"]["plot"], resources, embedded_resources) data_utils.get_resources(example["documents"]["review"], resources, embedded_resources) chat = example["chat"] # Loop over each of the last three utterances in the chat (context). for i in range(3, len(chat) - 1): last_utterances = chat[i - 3:i] response = chat[i + 1] if len(response) > 0: exp = [] embedded_utterances = [ data_utils.embed_sentence(utterance) for utterance in last_utterances ] context, embedded_context = \ data_utils.get_context(last_utterances) # Retrieve: Takes context and resources. Uses Word Mover's Distance # to obtain relevant resource candidates. similarities = retrieve(context, resources, gensim_model) padd_resource = embedded_resources[np.argmax( similarities)][-args.max_length:] padd_resource = np.pad( padd_resource, ((0, args.max_length - len(padd_resource)), (0, 0)), "constant", constant_values=(num_words)) exp.append(padd_resource) exp.append(data_utils.clean_sentence(chat[i + 1])) all_examples.append(tuple(exp)) save_data(filename, all_examples) return all_examples
def info(customer_name): info = retrieve(customer_name) print('info',info) print('first name',info[0]) print('last name',info[1]) print('phone number',info[2]) # print('points',info[3]) return render_template('info.html',info_list=info)
def getSensorData(latitude, longitude): #r = retrieve(float(sys.argv[1]),float(sys.argv[2])) r = retrieve(latitude,longitude) [temp,tDist,tTime] = r.getLatestValidTemperature() #print temp, tDist, tTime [level,sDist,sTime] = r.getLatestValidGulleySiltLevel() #print level, sDist, sTime [distance] = r.getNearestPothole() return json.dumps({"temperature(C)":temp, "thermometerDistance(km)":tDist, "thermometerTime":tTime, "siltLevel(%)":level, "siltSensorDistance(km)":sDist, "siltSensorTime":sTime, "nearestPotholeDistance(km)":distance},indent=4, separators=(',', ': '))
def btn_clicked_extractfeat_shot(self): self.imgnum = os.listdir(self.filebasepath).__len__() # 数据库图片数量 ox = retrieve() print('标记') ImgData1 = ox.load_image(self.filebasepath, self.imgnum) net = ox.load_vgg16model() self.featall = ox.extract_vgg16feat(ImgData1) np.savetxt( os.path.split(self.filebasepath)[0] + '/featall.txt', self.featall) _translate = QtCore.QCoreApplication.translate self.lineEdit_2.setText(_translate("MainWindow", '特征提取完成.....'))
def __search(self): self.scr.delete(1.0, END) keywords = self.keywordsvar.get() dirpath = self.pathvar.get() if not os.path.isdir(dirpath): messagebox.showinfo( "implicate", dirpath + " is not a directory, please choose a directory.") return result = retrieve(dirpath, keywords) if result: self.scr.insert("insert", result) else: messagebox.showinfo("result", "Nothing to be found.")
def segment(self, data, XMLin=False, XMLout=False): if XMLin: match = re.search('<term>(.*?)</term>', data) if match: data = match.group(1) data = re.sub('(.*?)', '', data) data = data.strip() else: return 'Error format.' #print(data) result = retrieve(data, self.database, self.answer) if result: #print('Matched in database.') pass else: result = callCRF(data) if XMLout: result = '<answer org="THU">' + result + '</answer>' return result
def btn_clicked_selectquery_shot(self): self.QueryImgPath = QFileDialog.getOpenFileName( None, "选择一个查询文件", ".", "Image Files(*.jpg *.jpeg *.png)")[0] self.ImgName = sorted(os.listdir(self.filebasepath)) #图片集合名字字符串列表 self.QueryImg = os.path.split(self.QueryImgPath)[1] #查询图片名字字符串 ##载入图片数据库文件的特征 self.featall = np.loadtxt(os.path.split(self.filebasepath)[0] + '/featall.txt', dtype=np.float32) ##提取查询图片深度特征 ox = retrieve() ImgData2 = ox.load_image(self.QueryImgPath, 1) #读取查询图片 net = ox.load_vgg16model() self.feat = ox.extract_vgg16feat(ImgData2) print('查询图片特征提取完成', self.feat.shape) self.image = QImage(self.QueryImgPath) self.label_5.setPixmap(QPixmap.fromImage(self.image)) self.label_5.setScaledContents(True)
if __name__ == "__main__": """The crawler's main entry point.""" socket.setdefaulttimeout(retrieve.search_timeout) q = urlqueue.URLQueue() loader = downloader.Downloader() file_mgr = filemgr.FileManager() opt_n, opt_terms = opts.obtain_opts() current_n = opt_n file_mgr.init(opt_n) logging.info("start!") # Retrieve results from google root_urls = retrieve.retrieve(opt_terms) q.add(root_urls, FLOAT_MAX) while q.size() > 0 and file_mgr.check(): url_ls = [] for i in range(THREAD_COUNT): if q.size() > 0: url_ls.append(q.pop()) for tmp in url_ls: if file_mgr.check(): loader.start(xtarget = crawl, kwargs = {"url": tmp['url'], "link_score": tmp['score'], "search_terms": opt_terms}) loader.join() # tmp = q.pop() # crawl(tmp['url'], tmp['score'], opt_terms) q.sort()
def pull(s, url=None): if url == None: url = s.item r = retrieve(url=url) s.data = r.data
def retrieving(self, prox, ir): """ Calls the retrieve function in the retrieveal module to find wheel speeds for retrieving the food source. """ retrieve.retrieve(ir, self.IR_THRESHOLD) self.set_wheel_speeds(retrieve.get_left_wheel_speed(), retrieve.get_right_wheel_speed()) self.do_timed_action(1.0)
def btn_retrieve_shot(self): ox = retrieve() print(self.QueryImg) ranklist = ox.visual_result(self.filebasepath, self.QueryImg, self.ImgName, self.feat, self.featall) print(ranklist[:10]) ############## self.image = QImage(self.filebasepath + '/' + ranklist[0]) self.label_7.setPixmap(QPixmap.fromImage(self.image)) self.label_7.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[1]) self.label_8.setPixmap(QPixmap.fromImage(self.image)) self.label_8.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[2]) self.label_9.setPixmap(QPixmap.fromImage(self.image)) self.label_9.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[5]) self.label_10.setPixmap(QPixmap.fromImage(self.image)) self.label_10.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[6]) self.label_11.setPixmap(QPixmap.fromImage(self.image)) self.label_11.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[7]) self.label_12.setPixmap(QPixmap.fromImage(self.image)) self.label_12.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[12]) self.label_13.setPixmap(QPixmap.fromImage(self.image)) self.label_13.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[11]) self.label_14.setPixmap(QPixmap.fromImage(self.image)) self.label_14.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[10]) self.label_15.setPixmap(QPixmap.fromImage(self.image)) self.label_15.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[8]) self.label_16.setPixmap(QPixmap.fromImage(self.image)) self.label_16.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[13]) self.label_17.setPixmap(QPixmap.fromImage(self.image)) self.label_17.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[3]) self.label_18.setPixmap(QPixmap.fromImage(self.image)) self.label_18.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[4]) self.label_19.setPixmap(QPixmap.fromImage(self.image)) self.label_19.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[9]) self.label_20.setPixmap(QPixmap.fromImage(self.image)) self.label_20.setScaledContents(True) self.image = QImage(self.filebasepath + '/' + ranklist[14]) self.label_21.setPixmap(QPixmap.fromImage(self.image)) self.label_21.setScaledContents(True)
from common import * import sys from retrieve import retrieve from extract import extract_resource from combine import combine_resource from cleanup import cleanup_resource from validate import validate_resource if __name__ == '__main__': DEBUG_RESOURCE = sys.argv[1] engine = db_connect() source_table = sl.get_table(engine, 'source') row = sl.find_one(engine, source_table, resource_id=DEBUG_RESOURCE) retrieve(row, engine, source_table, force=True) row = sl.find_one(engine, source_table, resource_id=DEBUG_RESOURCE) extract_resource(engine, source_table, row, force=True) row = sl.find_one(engine, source_table, resource_id=DEBUG_RESOURCE) combine_resource(engine, source_table, row, force=True) row = sl.find_one(engine, source_table, resource_id=DEBUG_RESOURCE) cleanup_resource(engine, source_table, row, force=True) row = sl.find_one(engine, source_table, resource_id=DEBUG_RESOURCE) validate_resource(engine, source_table, row, force=True)
def retrieve_one_seq(dataobj, seq_name, model, output_dir): vdidx = dataobj.sequence_to_sample_idx[seq_name] output_dir = os.path.join(output_dir, seq_name) os.makedirs(output_dir, exist_ok=True) with torch.no_grad(): retrieve(dataobj, vdidx, model, output_dir, logger=self.logger)
def run(data, word2vec): """ Retrieve, rerank, rewrite. """ global device emb_size = len(data_utils.embeddings[0]) SOS_token = torch.Tensor([i for i in range(emb_size) ]).unsqueeze(0).to(device) EOS_token = torch.Tensor([i + 1 for i in range(emb_size) ]).unsqueeze(0).to(device) w2emb = data_utils.load_w2emb(args.w2emb) w2emb["SOS_token"] = SOS_token.cpu() w2emb["EOS_token"] = EOS_token.cpu() templates = data_utils.load_templates(args.templates) templates = [[temp[-args.max_length:] for temp in part_templ] for part_templ in templates] templates = [[ np.pad(temp2, ((0, args.max_length - len(temp2)), (0, 0)), "constant", constant_values=(len(data_utils.w2i))) for temp2 in temp1 ] for temp1 in templates] templates = [torch.Tensor(class_tm) for class_tm in templates] rewrite = Rewrite(args.saliency_model, args.rewrite_model, data_utils.embeddings, data_utils.w2i, SOS_token, EOS_token, templates, w2emb, device) prediction = ResourcePrediction(args.prediction_model_folder) rouge = Rouge() total = 0 avg_rouge1 = 0 avg_rouge2 = 0 avg_rougeL = 0 avg_bleu = 0 smooth = SmoothingFunction() for example in tqdm(data): resources = [] embedded_resources = [] class_indices = [] data_utils.get_resources(example["documents"]["comments"], resources, embedded_resources) num_comments = len(resources) data_utils.get_resources(example["documents"]["fact_table"], resources, embedded_resources) num_facts = len(resources) - num_comments data_utils.get_resources(example["documents"]["plot"], resources, embedded_resources) num_plots = len(resources) - num_comments - num_facts data_utils.get_resources(example["documents"]["review"], resources, embedded_resources) num_reviews = len(resources) - num_comments - num_facts - num_plots # Keep track of where each resource originated from. class_indices += [2] * num_comments class_indices += [3] * num_facts class_indices += [0] * num_plots class_indices += [1] * num_reviews chat = example["chat"] # Loop over each of the last three utterances in the chat (the context). for i in range(3, len(chat) - 1): last_utterances = chat[i - 3:i] response = chat[i + 1] if len(response) > 0: embedded_utterances = [ data_utils.embed_sentence(utterance) for utterance in last_utterances ] context, embedded_context = data_utils.get_context( last_utterances) # Retrieve: Takes context and resources. Uses Word Mover's # Distance to obtain relevant resource candidates. similarities = retrieve(context, resources, word2vec) # Predict: Takes context and predicts the category of the # resource. Take the maximum length as max and pad the context # to maximum length if it is too short. if args.use_gensim: constant_values = len(data_utils.embeddings.index2word) else: constant_values = len(data_utils.w2i) last_utterance = embedded_utterances[-2] padded_utterance = last_utterance[-args.max_length:] padded_utterance = np.pad( padded_utterance, ((0, args.max_length - len(padded_utterance)), (0, 0)), "constant", constant_values=(constant_values)) if args.prediction: predicted = prediction.predict( np.expand_dims(padded_utterance, 0)) else: predicted = np.array([[0.25, 0.25, 0.25, 0.25]]) # Rerank Resources: Takes ranked resource candidates and class # prediction and reranks them. ranked_resources, ranked_classes = rerank( embedded_resources, class_indices, similarities, predicted) # Rerank Templates: Takes best resource and ranks the templates # accordingly. Returns the best template. best_resource, best_template = rewrite.rerank( ranked_resources[0], ranked_classes[0]) # Rewrite: Takes the best resource and best template and # rewrites them into a single response. best_response = rewrite.rewrite(best_resource, best_template) total += 1 rouge_scores = rouge.get_scores(best_response, response)[0] avg_rouge1 += rouge_scores["rouge-1"]["f"] avg_rouge2 += rouge_scores["rouge-2"]["f"] avg_rougeL += rouge_scores["rouge-l"]["f"] avg_bleu += sentence_bleu([response], best_response, smoothing_function=smooth.method1) print("Average rouge1: " + str(avg_rouge1 / total)) print("Average rouge2: " + str(avg_rouge2 / total)) print("Average rougel: " + str(avg_rougeL / total)) print("Average bleu: " + str(avg_bleu / total))
def retrieving(self, prox, ir): """ Calls the retrieve function in the retrieveal module to find wheel speeds for retrieving the food source. """ retrieve.retrieve(ir, self.IR_THRESHOLD) # Calculate new wheel speeds based on IR sensor values self.set_wheel_speeds(retrieve.get_left_wheel_speed(), retrieve.get_right_wheel_speed()) self.do_timed_action(self.timestep_duration) self.time_since_last_review = self.time_since_last_review + self.timestep_duration # update time since last stagnation review
def clean_database(): """ Clean ec_students_[semester] and ec_classes_[semester] table :return: none """ conn = mysql.connector.connect(**settings.MYSQL_CONFIG) cursor = conn.cursor() query = "TRUNCATE ec_students_%s" % get_semester_code_for_db( settings.SEMESTER) cursor.execute(query) query = "TRUNCATE ec_classes_%s" % get_semester_code_for_db( settings.SEMESTER) cursor.execute(query) cursor.close() conn.close() if __name__ == "__main__": with open("stu_data_version.json") as f: old_json_file = json.load(f)["stu_data_json_name"] fix_json(old_json_file) clean_directory() retrieve() clean_database() process() verify()