Esempio n. 1
0
def batchCalcTimePrefScore():
    """
    Batch work process.
    Access DB's predicttokens collection and task collection.
    Calculate score, and save it back to the DB.

    """
    from pymongo import MongoClient
    import tokenizer

    client = MongoClient(CONST_DB_ADDR, CONST_DB_PORT)
    # print client.database_names()
    db = client.test
    # print db.collection_names()

    task_collection = db.tasks;
    token_collection = db.predicttokens;
    for c in task_collection.find():
        # Run for all tasks.

        # We may want to run only for non-completed tasks because they are
        # the only tasks that time preference score matters.
        # However, for now we don't have enough data, so we need to use every
        # tasks even if it is already completed.

        # get score for all time slots.
        content = c['name'] + ' ' + c['description']
        tokens = tokenizer.extractor(content);
        score = getTimePrefScore(c['userId'], token_collection, tokens)
        task_collection.update_one({'_id': c['_id']}, {'$set': {'timePreferenceScore': score}});
Esempio n. 2
0
def batchCalcTimePrefScore():
    """
    Batch work process.
    Access DB's predicttokens collection and task collection.
    Calculate score, and save it back to the DB.

    """
    from pymongo import MongoClient
    import tokenizer

    client = MongoClient(CONST_DB_ADDR, CONST_DB_PORT)
    # print client.database_names()
    db = client.test
    # print db.collection_names()

    task_collection = db.tasks
    token_collection = db.predicttokens
    for c in task_collection.find():
        # Run for all tasks.

        # We may want to run only for non-completed tasks because they are
        # the only tasks that time preference score matters.
        # However, for now we don't have enough data, so we need to use every
        # tasks even if it is already completed.

        # get score for all time slots.
        content = c['name'] + ' ' + c['description']
        tokens = tokenizer.extractor(content)
        score = getTimePrefScore(c['userId'], token_collection, tokens)
        task_collection.update_one({'_id': c['_id']},
                                   {'$set': {
                                       'timePreferenceScore': score
                                   }})
def tokenizer_execute(language, page_html, link):
    obj = tokenizer.tokenizer(language)
    obj.generate_tokens()
    obj2 = tokenizer.semantic_tokenizer(obj.tokens)
    obj2.generate_tokens()
    obj3 = tokenizer.extractor(obj.tokens, obj2.semantic_tokens)
    if not page_html:
        returned_result = obj3.start_extract(link)
        return returned_result
    else:
        returned_result = obj3.start_extract_without_fetch(page_html)
        return returned_result
Esempio n. 4
0
def main():
    import sys
    import json

    result = {}
    try:
        result["tokens"] = tokenizer.extractor(sys.argv[1])
    except:
        with open('errlog.txt', 'w') as f:
            f.write(sys.exc_info()[0])
        sys.exit(1)

    print json.dumps(result)
    sys.exit(0)
Esempio n. 5
0
def main():
    import sys
    import json

    result = {}
    try:
        result["tokens"] = tokenizer.extractor(sys.argv[1])
    except:
        with open('errlog.txt', 'w') as f:
            f.write(sys.exc_info()[0])
        sys.exit(1);

    print json.dumps(result)
    sys.exit(0)
Esempio n. 6
0
def updateSyllabusToFirebase():
    text = extractor(reader(g.file).full_text).final_list

    db = firebase.database()

    l = []
    for i in text:
        topics = i.split(",")
        for j in topics:
            te = getContent(j)
            data = {"topic": j, "isVideo": te.isVideo, "summary": te.summary}
            db.child("syllabus").child(g.year_).child(g.branch_).child(g.subject_).push(data)
            l.append(te.summary)

    with open("new.txt", "w") as outfile:
        outfile.write("\n".join(i for i in l))
    print("DONE!!")