def test_move_doc(self): obj = {"i": "tomove"} db = Couch('test') doc_id = db.distinct_insert(obj) selector = {'_id': doc_id} Couch('test').move_doc(selector, 'test2') query_result = Couch('test2').query(obj) self.assertTrue(len(query_result) > 0)
def test_update(self): obj = {"abc": "1234", "def": {"abc": "4567"}} db = Couch('test') doc_id = db.distinct_insert(obj) selector = {'_id': doc_id} Couch('test').update(selector, 'def', {"abc": "5678"}) res = Couch('test').query(selector) for item in res: self.assertEqual(item['def'], {"abc": "5678"})
def fetch_vector(info1, info2, database): selector = { 'platform1': info1['platform'], 'platform2': info2['platform'], 'username1': info1['profile']['username'], 'username2': info2['profile']['username'] } database = Couch(database) query_res = database.query_latest_change(selector) return [_restore_float(x) for x in query_res]
def import_directory_to_db(path, db_name): db = Couch(db_name) files = os.listdir(path) cnt = 0 for filename in files: if cnt % 100 == 0: print('Processing {} of {} records.'.format(cnt, len(files))) cnt += 1 import_file_to_db(path + "/" + filename, db) logger.info('{} records inserted.'.format(len(files))) db.close()
def test_retrieve_flickr_realtime_in_db(self): account = {'platform': 'Flickr', 'account': 'sakuranyochan'} retrieve(account, REALTIME_MODE) db = Couch('flickr') query_result = db.query({'profile': {'username': '******'}}) db.close() self.assertTrue(len(query_result) > 0) query_result = retrieve(account, REALTIME_MODE) for item in query_result: self.assertTrue('profile' in item.keys() and item['profile']['username'] == 'sakuranyochan')
def store_result(info1, info2, vector, database): database = Couch(database) doc = { 'platform1': info1['platform'], 'platform2': info2['platform'], 'username1': info1['profile']['username'], 'username2': info2['profile']['username'], 'vector': vector } logger.info('Storing result: {}'.format(doc)) doc_id = database.distinct_insert(_convert_float(doc)) database.close() return doc_id
def query(): """ Request format: {'account1': {'platform':'xxx', 'account': 'aaa'}, 'account2': {'platform':'yyy', 'account': 'bbb'}} Response format: {'result': 0.123, 'doc_id': '5bea4d3efa3646879'} """ data = json.loads(request.get_data().decode('utf-8')) account1 = data['account1'] account2 = data['account2'] score = query_existing_similarity_in_db(account1, account2) if len(score) == 0: try: info1 = retrieve(account1, mode=REALTIME_MODE) info2 = retrieve(account2, mode=REALTIME_MODE) vector = algoModule.calc(info1, info2, enable_networking=(account1['platform'] == account2['platform']), mode=REALTIME_MODE) doc_id = algoModule.store_result(info1, info2, vector, DATABASE_DATA_AWAIT_FEEDBACK) score = Couch(DATABASE_DATA_AWAIT_FEEDBACK).query({'_id': doc_id}) except Exception as e: logger.error(e) return make_response({'error': True, 'error_message': str(e)}) doc = score[0] doc_id = doc['_id'] vector = doc['vector'] overall_score = OverallSimilarityCalculator().calc(doc) return make_response({'result': vector, 'columns': column_names, 'score': str(overall_score), 'doc_id': doc_id, 'error': False})
def apply_feedback(item): doc_id = item['doc_id'] label = item['feedback'] selector = {'_id': doc_id} db_name = DATABASE_DATA_AWAIT_FEEDBACK stored_records = Couch(db_name).query(selector) if stored_records: logger.info('Applying feedback to doc id {} in table {}.'.format( selector['_id'], db_name)) item = stored_records[0] item['vector']['label'] = label Couch(db_name).update(selector, 'vector', item['vector']) Couch(db_name).move_doc(selector, DATABASE_LABELED_DATA) logger.info( 'Applying feedback to doc id {} in table {} completed.'.format( selector['_id'], db_name))
def test_delete(self): obj = {"abc": "1234", "def": "4567"} selector = {"abc": "1234"} db = Couch('test') db.distinct_insert(obj) query_result = db.query(selector) self.assertTrue(len(query_result) > 0) db.delete(selector) query_result = db.query(selector) self.assertEqual(0, len(query_result))
def test_distinct_insert2(self): conn = Couch("test") test_doc = {"adsf": {"bbb": "fdsa", "aand": "ssss"}} for i in range(3): conn.distinct_insert(test_doc) query_result = conn.query(test_doc) conn.close() self.assertEqual(1, len(query_result))
def test_database_insert_and_partial_query(self): conn = Couch("test") test_doc = {"adsf": {"bbb": "fdsa", "aand": "ssss"}} conn.insert(test_doc) selector = {"asdf": {"bbb": "fdsa"}} res = conn.query(selector) conn.close() for item in res: self.assertEqual(item, test_doc)
def test_database_insert_and_query(self): conn = Couch("test") test_doc = {"abc": "def"} conn.insert(test_doc) selector = {"abc": "def"} res = conn.query(selector) conn.close() for item in res: self.assertEqual(item['abc'], 'def')
def query_existing_similarity_in_db(account1, account2): database_order = [DATABASE_LABELED_DATA, DATABASE_DATA_AWAIT_FEEDBACK] account1 = __format_account_query(account1) account2 = __format_account_query(account2) selectors = [{ 'platform1': account1['platform'], 'platform2': account2['platform'], 'username1': account1['account'], 'username2': account2['account'] }, { 'platform1': account2['platform'], 'platform2': account1['platform'], 'username1': account2['account'], 'username2': account1['account'] }] for db_name in database_order: for selector in selectors: database = Couch(db_name) query_res = database.query_latest_change(selector) if len(query_res) > 0: return [_restore_float(x) for x in query_res] return []
def login_account(): data = json.loads(request.get_data().decode('utf-8')) platform = data['platform'] username = data['username'] password = decrypt(data['password']) res = False instance = None if len(username) == 0 and len(password) == 0: return make_response({'result': res}) if platform == 'Instagram': instance = InsUtilsWithLogin(displayed=False) elif platform == 'Twitter': instance = TwiUtilsWithLogin(displayed=False) if instance is None: return make_response({'result': False}) instance.set_account((username, password)) res = instance.login() if res: database = Couch(DATABASE_CREDENTIAL) database.insert(data) database.close() return make_response({'result': res})
def batch_feedback(): feedback_await_batch = Couch(DATABASE_FEEDBACK).query_all() for feedback in feedback_await_batch: apply_feedback(feedback) doc_id = feedback['_id'] Couch(DATABASE_FEEDBACK).delete({'doc_id': doc_id})
def generate_model(mode, cross_features=False): logger.info('Start generating model in {} mode.'.format( 'REALTIME' if mode == REALTIME_MODE else 'BATCH')) logger.info('Production of features {}.'.format( 'enabled' if cross_features else 'disabled')) items = Couch(DATABASE_LABELED_DATA).query_all() items = list(filter(lambda x: 'label' in x['vector'].keys(), items)) logger.info('Retrieved {} labelled data from the database.'.format( len(items))) l = generate_feature_vectors(items, mode, cross_features) dataset = generate_dataset(l, mode, cross_features) train_dataset = dataset.sample(frac=0.8, random_state=0) test_dataset = dataset.drop(train_dataset.index) train_stats = train_dataset.describe() train_stats.pop("label") train_stats = train_stats.transpose() export_stats(train_stats, mode, cross_features) logger.info('Exported training stats.') train_labels = train_dataset.pop('label') test_labels = test_dataset.pop('label') normed_train_data = norm(train_dataset, train_stats) normed_test_data = norm(test_dataset, train_stats) model = build_model(train_dataset) early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10) logger.info('Training...') history = model.fit(normed_train_data, train_labels, epochs=EPOCHS, validation_split=0.2, verbose=0, callbacks=[early_stop, PrintDot()]) print('') loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0) logger.info("Evaluation of model:") logger.info("loss: {:5.2f}, mae: {:5.2f}, mse: {:5.2f}".format( loss, mae, mse)) test_predictions = model.predict(normed_test_data).flatten() pred = [1.0 if x >= 0.5 else 0.0 for x in test_predictions] res = list(zip(test_labels, pred)) tp = len(list(filter(lambda x: x[0] == 1 and x[1] == 1, res))) / len( list(filter(lambda x: x[0] == 1, res))) fp = len(list(filter(lambda x: x[0] == 0 and x[1] == 1, res))) / len( list(filter(lambda x: x[0] == 0, res))) tn = len(list(filter(lambda x: x[0] == 0 and x[1] == 0, res))) / len( list(filter(lambda x: x[0] == 0, res))) fn = len(list(filter(lambda x: x[0] == 1 and x[1] == 0, res))) / len( list(filter(lambda x: x[0] == 1, res))) precision = tp / (tp + fp) recall = tp / (tp + fn) f1 = 2 * precision * recall / (precision + recall) logger.info( "Precision: {:5.4f}, Recall: {:5.4f}, F1-score: {:5.4f}".format( precision, recall, f1)) return model
def test_query_latest(self): conn = Couch("test") selector = {"abc": "def"} res = conn.query_latest_change(selector) conn.close() self.assertEqual(1, len(res))