def batchCalcTimePrefScore(): """ Batch work process. Access DB's predicttokens collection and task collection. Calculate score, and save it back to the DB. """ from pymongo import MongoClient import tokenizer client = MongoClient(CONST_DB_ADDR, CONST_DB_PORT) # print client.database_names() db = client.test # print db.collection_names() task_collection = db.tasks; token_collection = db.predicttokens; for c in task_collection.find(): # Run for all tasks. # We may want to run only for non-completed tasks because they are # the only tasks that time preference score matters. # However, for now we don't have enough data, so we need to use every # tasks even if it is already completed. # get score for all time slots. content = c['name'] + ' ' + c['description'] tokens = tokenizer.extractor(content); score = getTimePrefScore(c['userId'], token_collection, tokens) task_collection.update_one({'_id': c['_id']}, {'$set': {'timePreferenceScore': score}});
def batchCalcTimePrefScore(): """ Batch work process. Access DB's predicttokens collection and task collection. Calculate score, and save it back to the DB. """ from pymongo import MongoClient import tokenizer client = MongoClient(CONST_DB_ADDR, CONST_DB_PORT) # print client.database_names() db = client.test # print db.collection_names() task_collection = db.tasks token_collection = db.predicttokens for c in task_collection.find(): # Run for all tasks. # We may want to run only for non-completed tasks because they are # the only tasks that time preference score matters. # However, for now we don't have enough data, so we need to use every # tasks even if it is already completed. # get score for all time slots. content = c['name'] + ' ' + c['description'] tokens = tokenizer.extractor(content) score = getTimePrefScore(c['userId'], token_collection, tokens) task_collection.update_one({'_id': c['_id']}, {'$set': { 'timePreferenceScore': score }})
def tokenizer_execute(language, page_html, link): obj = tokenizer.tokenizer(language) obj.generate_tokens() obj2 = tokenizer.semantic_tokenizer(obj.tokens) obj2.generate_tokens() obj3 = tokenizer.extractor(obj.tokens, obj2.semantic_tokens) if not page_html: returned_result = obj3.start_extract(link) return returned_result else: returned_result = obj3.start_extract_without_fetch(page_html) return returned_result
def main(): import sys import json result = {} try: result["tokens"] = tokenizer.extractor(sys.argv[1]) except: with open('errlog.txt', 'w') as f: f.write(sys.exc_info()[0]) sys.exit(1) print json.dumps(result) sys.exit(0)
def main(): import sys import json result = {} try: result["tokens"] = tokenizer.extractor(sys.argv[1]) except: with open('errlog.txt', 'w') as f: f.write(sys.exc_info()[0]) sys.exit(1); print json.dumps(result) sys.exit(0)
def updateSyllabusToFirebase(): text = extractor(reader(g.file).full_text).final_list db = firebase.database() l = [] for i in text: topics = i.split(",") for j in topics: te = getContent(j) data = {"topic": j, "isVideo": te.isVideo, "summary": te.summary} db.child("syllabus").child(g.year_).child(g.branch_).child(g.subject_).push(data) l.append(te.summary) with open("new.txt", "w") as outfile: outfile.write("\n".join(i for i in l)) print("DONE!!")