Ejemplo n.º 1
0
def clean_directory():
    """
    Clean raw_data directory

    :return: none
    """
    for each in os.listdir(os.path.join(os.getcwd(), 'raw_data')):
        os.remove(each)
    retrieve()
Ejemplo n.º 2
0
def getDrops():
    lng = request.query.lng
    lat = request.query.lat
    user_id = request.query.user_id
    #last update
    dropslist=retrieve.retrieve(lng, lat, user_id)
    return str(dropslist)
Ejemplo n.º 3
0
def main():
	subjects=sorted(payload.subject)
	session=requests.Session()
	while True:
		for i in range(len(subjects)):
			print("{:2d}.{}".format(i+1,subjects[i]))
		try:
			choice=input("\nYour choice: ")
			subject=subjects[int(choice)-1]
		except Exception:
			print("Please try again.\n")
			continue
		print("You have selected: "+subject)
		break
	print("Select the time range of the exam papers")
	print("Enter ctrl+C to exit")
	while True:
		try:
			from_date=int(input("From (Example:201306): "))
			to_date=int(input("To (Example:202001): "))
		except Exception:
			print("Please try again.\n")
			continue
		print("You have selected the time range {} to {}\n.".format(str(from_date),str(to_date)))
		if len(str(from_date)) == 6 and len(str(to_date)) == 6 and from_date <= to_date:
			break
		else:
			print("You have entered an invalied time range, please try again.")
	print("You can enter an optional keyword")
	keyword=input("Your keyword (N for none),default is none: ")
	if keyword == "N" or keyword == "" :
		keyword=""
		print("Your keyword is none")
	elif len(keyword) != 0:
		keyword=keyword.lower()
		print("Your keyword is: "+keyword)
	want_login=input("Some files require login to be downloaded,would you like to login? y/N: ").lower()
	if want_login == "y" or want_login == "yes":
		username=input("Username: "******"Password: ")
		l=login.Login(session,username,password)
		session=l.start()
		download.download(session,subject,retrieve.retrieve(subject,keyword,from_date,to_date),login=True)
		return 0
	else:
		download.download(session,subject,retrieve.retrieve(subject,keyword,from_date,to_date),login=False)
		return 0
Ejemplo n.º 4
0
def get_data(filename, data, embeddings, w2i, gensim_model, args):
    """
    Retrieves all data. Load it from a Pickle file if it exists, and create it
    otherwise.
    """

    global num_words

    if os.path.exists(filename):
        all_examples = data_utils.load_pickle(filename)
    else:
        all_examples = []

        for example in tqdm(data[:10]):
            resources = []
            embedded_resources = []

            data_utils.get_resources(example["documents"]["comments"],
                                     resources, embedded_resources)
            data_utils.get_resources(example["documents"]["fact_table"],
                                     resources, embedded_resources)
            data_utils.get_resources(example["documents"]["plot"], resources,
                                     embedded_resources)
            data_utils.get_resources(example["documents"]["review"], resources,
                                     embedded_resources)

            chat = example["chat"]

            # Loop over each of the last three utterances in the chat (context).
            for i in range(3, len(chat) - 1):
                last_utterances = chat[i - 3:i]
                response = chat[i + 1]

                if len(response) > 0:
                    exp = []
                    embedded_utterances = [
                        data_utils.embed_sentence(utterance)
                        for utterance in last_utterances
                    ]
                    context, embedded_context = \
                        data_utils.get_context(last_utterances)

                    # Retrieve: Takes context and resources. Uses Word Mover's Distance
                    # to obtain relevant resource candidates.
                    similarities = retrieve(context, resources, gensim_model)

                    padd_resource = embedded_resources[np.argmax(
                        similarities)][-args.max_length:]
                    padd_resource = np.pad(
                        padd_resource,
                        ((0, args.max_length - len(padd_resource)), (0, 0)),
                        "constant",
                        constant_values=(num_words))

                    exp.append(padd_resource)
                    exp.append(data_utils.clean_sentence(chat[i + 1]))
                    all_examples.append(tuple(exp))
        save_data(filename, all_examples)
    return all_examples
def info(customer_name):
    info = retrieve(customer_name)
    print('info',info)
    print('first name',info[0])
    print('last name',info[1])
    print('phone number',info[2])
    # print('points',info[3])
    return render_template('info.html',info_list=info)
Ejemplo n.º 6
0
def getSensorData(latitude, longitude):
  #r = retrieve(float(sys.argv[1]),float(sys.argv[2]))
  r = retrieve(latitude,longitude)
  [temp,tDist,tTime] = r.getLatestValidTemperature()
  #print temp, tDist, tTime
  [level,sDist,sTime] = r.getLatestValidGulleySiltLevel()
  #print level, sDist, sTime
  [distance] = r.getNearestPothole()
  return json.dumps({"temperature(C)":temp, "thermometerDistance(km)":tDist, "thermometerTime":tTime, "siltLevel(%)":level, "siltSensorDistance(km)":sDist, "siltSensorTime":sTime, "nearestPotholeDistance(km)":distance},indent=4, separators=(',', ': '))
Ejemplo n.º 7
0
    def btn_clicked_extractfeat_shot(self):
        self.imgnum = os.listdir(self.filebasepath).__len__()  # 数据库图片数量

        ox = retrieve()
        print('标记')

        ImgData1 = ox.load_image(self.filebasepath, self.imgnum)
        net = ox.load_vgg16model()
        self.featall = ox.extract_vgg16feat(ImgData1)
        np.savetxt(
            os.path.split(self.filebasepath)[0] + '/featall.txt', self.featall)

        _translate = QtCore.QCoreApplication.translate
        self.lineEdit_2.setText(_translate("MainWindow", '特征提取完成.....'))
Ejemplo n.º 8
0
 def __search(self):
     self.scr.delete(1.0, END)
     keywords = self.keywordsvar.get()
     dirpath = self.pathvar.get()
     if not os.path.isdir(dirpath):
         messagebox.showinfo(
             "implicate",
             dirpath + " is not a directory, please choose a directory.")
         return
     result = retrieve(dirpath, keywords)
     if result:
         self.scr.insert("insert", result)
     else:
         messagebox.showinfo("result", "Nothing to be found.")
Ejemplo n.º 9
0
    def segment(self, data, XMLin=False, XMLout=False):
        if XMLin:
            match = re.search('<term>(.*?)</term>', data)
            if match:
                data = match.group(1)
                data = re.sub('(.*?)', '', data)
                data = data.strip()
            else:
                return 'Error format.'

        #print(data)
        result = retrieve(data, self.database, self.answer)
        if result:
            #print('Matched in database.')
            pass
        else:
            result = callCRF(data)

        if XMLout:
            result = '<answer org="THU">' + result + '</answer>'
        return result
Ejemplo n.º 10
0
    def btn_clicked_selectquery_shot(self):
        self.QueryImgPath = QFileDialog.getOpenFileName(
            None, "选择一个查询文件", ".", "Image Files(*.jpg *.jpeg *.png)")[0]

        self.ImgName = sorted(os.listdir(self.filebasepath))  #图片集合名字字符串列表
        self.QueryImg = os.path.split(self.QueryImgPath)[1]  #查询图片名字字符串

        ##载入图片数据库文件的特征
        self.featall = np.loadtxt(os.path.split(self.filebasepath)[0] +
                                  '/featall.txt',
                                  dtype=np.float32)

        ##提取查询图片深度特征
        ox = retrieve()
        ImgData2 = ox.load_image(self.QueryImgPath, 1)  #读取查询图片
        net = ox.load_vgg16model()
        self.feat = ox.extract_vgg16feat(ImgData2)
        print('查询图片特征提取完成', self.feat.shape)

        self.image = QImage(self.QueryImgPath)
        self.label_5.setPixmap(QPixmap.fromImage(self.image))
        self.label_5.setScaledContents(True)
Ejemplo n.º 11
0

if __name__ == "__main__":
	"""The crawler's main entry point."""
	socket.setdefaulttimeout(retrieve.search_timeout)
	q = urlqueue.URLQueue()
	loader = downloader.Downloader()
	file_mgr = filemgr.FileManager()
	
	opt_n, opt_terms = opts.obtain_opts()
	current_n = opt_n
	file_mgr.init(opt_n)

	logging.info("start!")
	# Retrieve results from google
	root_urls = retrieve.retrieve(opt_terms)
	q.add(root_urls, FLOAT_MAX)	
	while q.size() > 0 and file_mgr.check():
		url_ls = []
		for i in range(THREAD_COUNT):
			if q.size() > 0:
				url_ls.append(q.pop())
		for tmp in url_ls:
			if file_mgr.check():
				loader.start(xtarget = crawl, kwargs = {"url": tmp['url'], "link_score": tmp['score'], "search_terms": opt_terms})
		loader.join()

		# tmp = q.pop()
		# crawl(tmp['url'], tmp['score'], opt_terms)

		q.sort()
Ejemplo n.º 12
0
 def pull(s, url=None):
     if url == None:
         url = s.item
     r = retrieve(url=url)
     s.data = r.data
Ejemplo n.º 13
0
 def retrieving(self, prox, ir):
     """ Calls the retrieve function in the retrieveal module to find wheel speeds for retrieving the food source. """
     retrieve.retrieve(ir, self.IR_THRESHOLD)
     self.set_wheel_speeds(retrieve.get_left_wheel_speed(), retrieve.get_right_wheel_speed())
     self.do_timed_action(1.0)
Ejemplo n.º 14
0
    def btn_retrieve_shot(self):
        ox = retrieve()
        print(self.QueryImg)
        ranklist = ox.visual_result(self.filebasepath, self.QueryImg,
                                    self.ImgName, self.feat, self.featall)
        print(ranklist[:10])

        ##############

        self.image = QImage(self.filebasepath + '/' + ranklist[0])
        self.label_7.setPixmap(QPixmap.fromImage(self.image))
        self.label_7.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[1])
        self.label_8.setPixmap(QPixmap.fromImage(self.image))
        self.label_8.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[2])
        self.label_9.setPixmap(QPixmap.fromImage(self.image))
        self.label_9.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[5])
        self.label_10.setPixmap(QPixmap.fromImage(self.image))
        self.label_10.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[6])
        self.label_11.setPixmap(QPixmap.fromImage(self.image))
        self.label_11.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[7])
        self.label_12.setPixmap(QPixmap.fromImage(self.image))
        self.label_12.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[12])
        self.label_13.setPixmap(QPixmap.fromImage(self.image))
        self.label_13.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[11])
        self.label_14.setPixmap(QPixmap.fromImage(self.image))
        self.label_14.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[10])
        self.label_15.setPixmap(QPixmap.fromImage(self.image))
        self.label_15.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[8])
        self.label_16.setPixmap(QPixmap.fromImage(self.image))
        self.label_16.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[13])
        self.label_17.setPixmap(QPixmap.fromImage(self.image))
        self.label_17.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[3])
        self.label_18.setPixmap(QPixmap.fromImage(self.image))
        self.label_18.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[4])
        self.label_19.setPixmap(QPixmap.fromImage(self.image))
        self.label_19.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[9])
        self.label_20.setPixmap(QPixmap.fromImage(self.image))
        self.label_20.setScaledContents(True)

        self.image = QImage(self.filebasepath + '/' + ranklist[14])
        self.label_21.setPixmap(QPixmap.fromImage(self.image))
        self.label_21.setScaledContents(True)
Ejemplo n.º 15
0
from common import *
import sys

from retrieve import retrieve
from extract import extract_resource
from combine import combine_resource
from cleanup import cleanup_resource
from validate import validate_resource

if __name__ == '__main__':
    DEBUG_RESOURCE = sys.argv[1]
    engine = db_connect()
    source_table = sl.get_table(engine, 'source')
    row = sl.find_one(engine, source_table, resource_id=DEBUG_RESOURCE)
    retrieve(row, engine, source_table, force=True)
    row = sl.find_one(engine, source_table, resource_id=DEBUG_RESOURCE)
    extract_resource(engine, source_table, row, force=True)
    row = sl.find_one(engine, source_table, resource_id=DEBUG_RESOURCE)
    combine_resource(engine, source_table, row, force=True)
    row = sl.find_one(engine, source_table, resource_id=DEBUG_RESOURCE)
    cleanup_resource(engine, source_table, row, force=True)
    row = sl.find_one(engine, source_table, resource_id=DEBUG_RESOURCE)
    validate_resource(engine, source_table, row, force=True)

Ejemplo n.º 16
0
 def retrieve_one_seq(dataobj, seq_name, model, output_dir):
     vdidx = dataobj.sequence_to_sample_idx[seq_name]
     output_dir = os.path.join(output_dir, seq_name)
     os.makedirs(output_dir, exist_ok=True)
     with torch.no_grad():
         retrieve(dataobj, vdidx, model, output_dir, logger=self.logger)
Ejemplo n.º 17
0
Archivo: main.py Proyecto: Caiit/IR2
def run(data, word2vec):
    """
    Retrieve, rerank, rewrite.
    """

    global device

    emb_size = len(data_utils.embeddings[0])
    SOS_token = torch.Tensor([i for i in range(emb_size)
                              ]).unsqueeze(0).to(device)
    EOS_token = torch.Tensor([i + 1 for i in range(emb_size)
                              ]).unsqueeze(0).to(device)
    w2emb = data_utils.load_w2emb(args.w2emb)
    w2emb["SOS_token"] = SOS_token.cpu()
    w2emb["EOS_token"] = EOS_token.cpu()

    templates = data_utils.load_templates(args.templates)
    templates = [[temp[-args.max_length:] for temp in part_templ]
                 for part_templ in templates]
    templates = [[
        np.pad(temp2, ((0, args.max_length - len(temp2)), (0, 0)),
               "constant",
               constant_values=(len(data_utils.w2i))) for temp2 in temp1
    ] for temp1 in templates]
    templates = [torch.Tensor(class_tm) for class_tm in templates]
    rewrite = Rewrite(args.saliency_model, args.rewrite_model,
                      data_utils.embeddings, data_utils.w2i, SOS_token,
                      EOS_token, templates, w2emb, device)
    prediction = ResourcePrediction(args.prediction_model_folder)

    rouge = Rouge()
    total = 0
    avg_rouge1 = 0
    avg_rouge2 = 0
    avg_rougeL = 0
    avg_bleu = 0

    smooth = SmoothingFunction()

    for example in tqdm(data):
        resources = []
        embedded_resources = []
        class_indices = []

        data_utils.get_resources(example["documents"]["comments"], resources,
                                 embedded_resources)
        num_comments = len(resources)
        data_utils.get_resources(example["documents"]["fact_table"], resources,
                                 embedded_resources)
        num_facts = len(resources) - num_comments
        data_utils.get_resources(example["documents"]["plot"], resources,
                                 embedded_resources)
        num_plots = len(resources) - num_comments - num_facts
        data_utils.get_resources(example["documents"]["review"], resources,
                                 embedded_resources)
        num_reviews = len(resources) - num_comments - num_facts - num_plots

        # Keep track of where each resource originated from.
        class_indices += [2] * num_comments
        class_indices += [3] * num_facts
        class_indices += [0] * num_plots
        class_indices += [1] * num_reviews

        chat = example["chat"]

        # Loop over each of the last three utterances in the chat (the context).
        for i in range(3, len(chat) - 1):
            last_utterances = chat[i - 3:i]
            response = chat[i + 1]

            if len(response) > 0:
                embedded_utterances = [
                    data_utils.embed_sentence(utterance)
                    for utterance in last_utterances
                ]
                context, embedded_context = data_utils.get_context(
                    last_utterances)

                # Retrieve: Takes context and resources. Uses Word Mover's
                # Distance to obtain relevant resource candidates.
                similarities = retrieve(context, resources, word2vec)

                # Predict: Takes context and predicts the category of the
                # resource. Take the maximum length as max and pad the context
                # to maximum length if it is too short.
                if args.use_gensim:
                    constant_values = len(data_utils.embeddings.index2word)
                else:
                    constant_values = len(data_utils.w2i)

                last_utterance = embedded_utterances[-2]
                padded_utterance = last_utterance[-args.max_length:]
                padded_utterance = np.pad(
                    padded_utterance,
                    ((0, args.max_length - len(padded_utterance)), (0, 0)),
                    "constant",
                    constant_values=(constant_values))
                if args.prediction:
                    predicted = prediction.predict(
                        np.expand_dims(padded_utterance, 0))
                else:
                    predicted = np.array([[0.25, 0.25, 0.25, 0.25]])

                # Rerank Resources: Takes ranked resource candidates and class
                # prediction and reranks them.
                ranked_resources, ranked_classes = rerank(
                    embedded_resources, class_indices, similarities, predicted)

                # Rerank Templates: Takes best resource and ranks the templates
                # accordingly. Returns the best template.
                best_resource, best_template = rewrite.rerank(
                    ranked_resources[0], ranked_classes[0])

                # Rewrite: Takes the best resource and best template and
                # rewrites them into a single response.
                best_response = rewrite.rewrite(best_resource, best_template)
                total += 1
                rouge_scores = rouge.get_scores(best_response, response)[0]
                avg_rouge1 += rouge_scores["rouge-1"]["f"]
                avg_rouge2 += rouge_scores["rouge-2"]["f"]
                avg_rougeL += rouge_scores["rouge-l"]["f"]
                avg_bleu += sentence_bleu([response],
                                          best_response,
                                          smoothing_function=smooth.method1)

    print("Average rouge1: " + str(avg_rouge1 / total))
    print("Average rouge2: " + str(avg_rouge2 / total))
    print("Average rougel: " + str(avg_rougeL / total))
    print("Average bleu: " + str(avg_bleu / total))
Ejemplo n.º 18
0
 def retrieving(self, prox, ir):
     """ Calls the retrieve function in the retrieveal module to find wheel speeds for retrieving the food source. """
     retrieve.retrieve(ir, self.IR_THRESHOLD) # Calculate new wheel speeds based on IR sensor values
     self.set_wheel_speeds(retrieve.get_left_wheel_speed(), retrieve.get_right_wheel_speed())
     self.do_timed_action(self.timestep_duration)
     self.time_since_last_review = self.time_since_last_review + self.timestep_duration # update time since last stagnation review
Ejemplo n.º 19
0
def clean_database():
    """
    Clean ec_students_[semester] and ec_classes_[semester] table

    :return: none
    """
    conn = mysql.connector.connect(**settings.MYSQL_CONFIG)
    cursor = conn.cursor()

    query = "TRUNCATE ec_students_%s" % get_semester_code_for_db(
        settings.SEMESTER)
    cursor.execute(query)
    query = "TRUNCATE ec_classes_%s" % get_semester_code_for_db(
        settings.SEMESTER)
    cursor.execute(query)
    cursor.close()
    conn.close()


if __name__ == "__main__":
    with open("stu_data_version.json") as f:
        old_json_file = json.load(f)["stu_data_json_name"]

    fix_json(old_json_file)
    clean_directory()
    retrieve()
    clean_database()
    process()
    verify()