return "{:.2f} seconds".format(seconds) from collections import Counter import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from dotenv import load_dotenv load_dotenv('admin.env') # from db_connect_mag import Session, Paper, PaperAuthorAffiliation # from db_connect_mag import db from mysql_connect import get_db_connection db = get_db_connection('mag_20180329') import logging logging.basicConfig( format='%(asctime)s %(name)s.%(lineno)d %(levelname)s : %(message)s', datefmt="%H:%M:%S", level=logging.INFO) # logger = logging.getLogger(__name__) logger = logging.getLogger('__main__').getChild(__name__) from get_papers_2_degrees_out import _get_papers_and_save_from_id_list def get_id_list_from_file(fname, header=True): id_list = [] with open(fname, 'r') as f:
logger = logging.getLogger('__main__').getChild(__name__) import pandas as pd import numpy as np from dotenv import load_dotenv logger.debug("loading dotenv...") success_load_dotenv = load_dotenv('admin.env') if success_load_dotenv: logger.debug("dotenv loaded successfully.") else: logger.warn("failed to load dotenv") from mysql_connect import get_db_connection db_mag = get_db_connection('mag_2017-10') from utils.autoreview_utils import prepare_directory REVIEW_DATA = 'data/wos_reviews_mag_match_by_doi.tsv' COLLECT_DATA_AND_RUN_EXPERIMENTS_SCRIPT = 'get_haystack_then_run_experiments.py' def load_review_data(fname): df = pd.read_table(REVIEW_DATA) df.dropna(subset=['mag_date', 'multiple_match_flag'], inplace=True) df.mag_date = pd.to_datetime(df.mag_date) df.multiple_match_flag = df.multiple_match_flag.astype(int) return df
from sklearn.feature_extraction import DictVectorizer from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer from sklearn.metrics import classification_report from utils.autoreview_utils import ItemSelector, DataFrameColumnTransformer, ClusterTransformer, AverageTfidfCosSimTransformer # from features import avgDist_EF, avgDist_EF_avgTitleTfidfCosSim from dotenv import load_dotenv logger.debug("loading dotenv...") success_load_dotenv = load_dotenv('admin.env') if success_load_dotenv: logger.debug("dotenv loaded successfully.") else: logger.warn("failed to load dotenv") from mysql_connect import get_db_connection db = get_db_connection('jp_autoreview') from sqlalchemy.orm import sessionmaker from models.jp_autoreview import PipelineTest Session = sessionmaker(bind=db.engine) class PipelineExperiment(object): """configure and run a pipeline for a review paper classifier""" def __init__(self, clf, transformer_list, seed_papers=None, random_state=999): """
def main(args): db = get_db_connection('jp_autoreview') Base.metadata.create_all(bind=db.engine)