Esempio n. 1
0
 def test_move_doc(self):
     obj = {"i": "tomove"}
     db = Couch('test')
     doc_id = db.distinct_insert(obj)
     selector = {'_id': doc_id}
     Couch('test').move_doc(selector, 'test2')
     query_result = Couch('test2').query(obj)
     self.assertTrue(len(query_result) > 0)
Esempio n. 2
0
 def test_update(self):
     obj = {"abc": "1234", "def": {"abc": "4567"}}
     db = Couch('test')
     doc_id = db.distinct_insert(obj)
     selector = {'_id': doc_id}
     Couch('test').update(selector, 'def', {"abc": "5678"})
     res = Couch('test').query(selector)
     for item in res:
         self.assertEqual(item['def'], {"abc": "5678"})
Esempio n. 3
0
 def fetch_vector(info1, info2, database):
     selector = {
         'platform1': info1['platform'],
         'platform2': info2['platform'],
         'username1': info1['profile']['username'],
         'username2': info2['profile']['username']
     }
     database = Couch(database)
     query_res = database.query_latest_change(selector)
     return [_restore_float(x) for x in query_res]
Esempio n. 4
0
def import_directory_to_db(path, db_name):
    db = Couch(db_name)
    files = os.listdir(path)
    cnt = 0
    for filename in files:
        if cnt % 100 == 0:
            print('Processing {} of {} records.'.format(cnt, len(files)))
        cnt += 1
        import_file_to_db(path + "/" + filename, db)
    logger.info('{} records inserted.'.format(len(files)))
    db.close()
Esempio n. 5
0
 def test_retrieve_flickr_realtime_in_db(self):
     account = {'platform': 'Flickr', 'account': 'sakuranyochan'}
     retrieve(account, REALTIME_MODE)
     db = Couch('flickr')
     query_result = db.query({'profile': {'username': '******'}})
     db.close()
     self.assertTrue(len(query_result) > 0)
     query_result = retrieve(account, REALTIME_MODE)
     for item in query_result:
         self.assertTrue('profile' in item.keys()
                         and item['profile']['username'] == 'sakuranyochan')
Esempio n. 6
0
 def store_result(info1, info2, vector, database):
     database = Couch(database)
     doc = {
         'platform1': info1['platform'],
         'platform2': info2['platform'],
         'username1': info1['profile']['username'],
         'username2': info2['profile']['username'],
         'vector': vector
     }
     logger.info('Storing result: {}'.format(doc))
     doc_id = database.distinct_insert(_convert_float(doc))
     database.close()
     return doc_id
Esempio n. 7
0
def query():
    """
        Request format:
            {'account1': {'platform':'xxx', 'account': 'aaa'}, 'account2': {'platform':'yyy', 'account': 'bbb'}}
        Response format:
            {'result': 0.123, 'doc_id': '5bea4d3efa3646879'}
    """
    data = json.loads(request.get_data().decode('utf-8'))
    account1 = data['account1']
    account2 = data['account2']
    score = query_existing_similarity_in_db(account1, account2)
    if len(score) == 0:
        try:
            info1 = retrieve(account1, mode=REALTIME_MODE)
            info2 = retrieve(account2, mode=REALTIME_MODE)
            vector = algoModule.calc(info1, info2, enable_networking=(account1['platform'] == account2['platform']),
                                     mode=REALTIME_MODE)
            doc_id = algoModule.store_result(info1, info2, vector, DATABASE_DATA_AWAIT_FEEDBACK)
            score = Couch(DATABASE_DATA_AWAIT_FEEDBACK).query({'_id': doc_id})
        except Exception as e:
            logger.error(e)
            return make_response({'error': True, 'error_message': str(e)})
    doc = score[0]
    doc_id = doc['_id']
    vector = doc['vector']
    overall_score = OverallSimilarityCalculator().calc(doc)
    return make_response({'result': vector, 'columns': column_names,
                          'score': str(overall_score), 'doc_id': doc_id,
                          'error': False})
Esempio n. 8
0
def apply_feedback(item):
    doc_id = item['doc_id']
    label = item['feedback']
    selector = {'_id': doc_id}
    db_name = DATABASE_DATA_AWAIT_FEEDBACK
    stored_records = Couch(db_name).query(selector)
    if stored_records:
        logger.info('Applying feedback to doc id {} in table {}.'.format(
            selector['_id'], db_name))
        item = stored_records[0]
        item['vector']['label'] = label
        Couch(db_name).update(selector, 'vector', item['vector'])
        Couch(db_name).move_doc(selector, DATABASE_LABELED_DATA)
        logger.info(
            'Applying feedback to doc id {} in table {} completed.'.format(
                selector['_id'], db_name))
Esempio n. 9
0
 def test_delete(self):
     obj = {"abc": "1234", "def": "4567"}
     selector = {"abc": "1234"}
     db = Couch('test')
     db.distinct_insert(obj)
     query_result = db.query(selector)
     self.assertTrue(len(query_result) > 0)
     db.delete(selector)
     query_result = db.query(selector)
     self.assertEqual(0, len(query_result))
Esempio n. 10
0
 def test_distinct_insert2(self):
     conn = Couch("test")
     test_doc = {"adsf": {"bbb": "fdsa", "aand": "ssss"}}
     for i in range(3):
         conn.distinct_insert(test_doc)
     query_result = conn.query(test_doc)
     conn.close()
     self.assertEqual(1, len(query_result))
Esempio n. 11
0
 def test_database_insert_and_partial_query(self):
     conn = Couch("test")
     test_doc = {"adsf": {"bbb": "fdsa", "aand": "ssss"}}
     conn.insert(test_doc)
     selector = {"asdf": {"bbb": "fdsa"}}
     res = conn.query(selector)
     conn.close()
     for item in res:
         self.assertEqual(item, test_doc)
Esempio n. 12
0
 def test_database_insert_and_query(self):
     conn = Couch("test")
     test_doc = {"abc": "def"}
     conn.insert(test_doc)
     selector = {"abc": "def"}
     res = conn.query(selector)
     conn.close()
     for item in res:
         self.assertEqual(item['abc'], 'def')
Esempio n. 13
0
def query_existing_similarity_in_db(account1, account2):
    database_order = [DATABASE_LABELED_DATA, DATABASE_DATA_AWAIT_FEEDBACK]
    account1 = __format_account_query(account1)
    account2 = __format_account_query(account2)
    selectors = [{
        'platform1': account1['platform'],
        'platform2': account2['platform'],
        'username1': account1['account'],
        'username2': account2['account']
    }, {
        'platform1': account2['platform'],
        'platform2': account1['platform'],
        'username1': account2['account'],
        'username2': account1['account']
    }]
    for db_name in database_order:
        for selector in selectors:
            database = Couch(db_name)
            query_res = database.query_latest_change(selector)
            if len(query_res) > 0:
                return [_restore_float(x) for x in query_res]
    return []
Esempio n. 14
0
def login_account():
    data = json.loads(request.get_data().decode('utf-8'))
    platform = data['platform']
    username = data['username']
    password = decrypt(data['password'])
    res = False
    instance = None
    if len(username) == 0 and len(password) == 0:
        return make_response({'result': res})
    if platform == 'Instagram':
        instance = InsUtilsWithLogin(displayed=False)
    elif platform == 'Twitter':
        instance = TwiUtilsWithLogin(displayed=False)
    if instance is None:
        return make_response({'result': False})
    instance.set_account((username, password))
    res = instance.login()
    if res:
        database = Couch(DATABASE_CREDENTIAL)
        database.insert(data)
        database.close()

    return make_response({'result': res})
Esempio n. 15
0
def batch_feedback():
    feedback_await_batch = Couch(DATABASE_FEEDBACK).query_all()
    for feedback in feedback_await_batch:
        apply_feedback(feedback)
        doc_id = feedback['_id']
        Couch(DATABASE_FEEDBACK).delete({'doc_id': doc_id})
Esempio n. 16
0
def generate_model(mode, cross_features=False):
    logger.info('Start generating model in {} mode.'.format(
        'REALTIME' if mode == REALTIME_MODE else 'BATCH'))
    logger.info('Production of features {}.'.format(
        'enabled' if cross_features else 'disabled'))
    items = Couch(DATABASE_LABELED_DATA).query_all()
    items = list(filter(lambda x: 'label' in x['vector'].keys(), items))
    logger.info('Retrieved {} labelled data from the database.'.format(
        len(items)))
    l = generate_feature_vectors(items, mode, cross_features)

    dataset = generate_dataset(l, mode, cross_features)
    train_dataset = dataset.sample(frac=0.8, random_state=0)
    test_dataset = dataset.drop(train_dataset.index)

    train_stats = train_dataset.describe()
    train_stats.pop("label")
    train_stats = train_stats.transpose()
    export_stats(train_stats, mode, cross_features)
    logger.info('Exported training stats.')

    train_labels = train_dataset.pop('label')
    test_labels = test_dataset.pop('label')

    normed_train_data = norm(train_dataset, train_stats)
    normed_test_data = norm(test_dataset, train_stats)

    model = build_model(train_dataset)

    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    logger.info('Training...')
    history = model.fit(normed_train_data,
                        train_labels,
                        epochs=EPOCHS,
                        validation_split=0.2,
                        verbose=0,
                        callbacks=[early_stop, PrintDot()])
    print('')

    loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)
    logger.info("Evaluation of model:")
    logger.info("loss: {:5.2f}, mae: {:5.2f}, mse: {:5.2f}".format(
        loss, mae, mse))

    test_predictions = model.predict(normed_test_data).flatten()
    pred = [1.0 if x >= 0.5 else 0.0 for x in test_predictions]
    res = list(zip(test_labels, pred))
    tp = len(list(filter(lambda x: x[0] == 1 and x[1] == 1, res))) / len(
        list(filter(lambda x: x[0] == 1, res)))
    fp = len(list(filter(lambda x: x[0] == 0 and x[1] == 1, res))) / len(
        list(filter(lambda x: x[0] == 0, res)))
    tn = len(list(filter(lambda x: x[0] == 0 and x[1] == 0, res))) / len(
        list(filter(lambda x: x[0] == 0, res)))
    fn = len(list(filter(lambda x: x[0] == 1 and x[1] == 0, res))) / len(
        list(filter(lambda x: x[0] == 1, res)))
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * precision * recall / (precision + recall)
    logger.info(
        "Precision: {:5.4f}, Recall: {:5.4f}, F1-score: {:5.4f}".format(
            precision, recall, f1))

    return model
Esempio n. 17
0
 def test_query_latest(self):
     conn = Couch("test")
     selector = {"abc": "def"}
     res = conn.query_latest_change(selector)
     conn.close()
     self.assertEqual(1, len(res))