return "{:.2f} seconds".format(seconds)


from collections import Counter

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from dotenv import load_dotenv
load_dotenv('admin.env')

# from db_connect_mag import Session, Paper, PaperAuthorAffiliation
# from db_connect_mag import db
from mysql_connect import get_db_connection
db = get_db_connection('mag_20180329')

import logging
logging.basicConfig(
    format='%(asctime)s %(name)s.%(lineno)d %(levelname)s : %(message)s',
    datefmt="%H:%M:%S",
    level=logging.INFO)
# logger = logging.getLogger(__name__)
logger = logging.getLogger('__main__').getChild(__name__)

from get_papers_2_degrees_out import _get_papers_and_save_from_id_list


def get_id_list_from_file(fname, header=True):
    id_list = []
    with open(fname, 'r') as f:
Example #2
0
logger = logging.getLogger('__main__').getChild(__name__)

import pandas as pd
import numpy as np

from dotenv import load_dotenv

logger.debug("loading dotenv...")
success_load_dotenv = load_dotenv('admin.env')
if success_load_dotenv:
    logger.debug("dotenv loaded successfully.")
else:
    logger.warn("failed to load dotenv")
from mysql_connect import get_db_connection

db_mag = get_db_connection('mag_2017-10')

from utils.autoreview_utils import prepare_directory

REVIEW_DATA = 'data/wos_reviews_mag_match_by_doi.tsv'
COLLECT_DATA_AND_RUN_EXPERIMENTS_SCRIPT = 'get_haystack_then_run_experiments.py'


def load_review_data(fname):
    df = pd.read_table(REVIEW_DATA)
    df.dropna(subset=['mag_date', 'multiple_match_flag'], inplace=True)
    df.mag_date = pd.to_datetime(df.mag_date)
    df.multiple_match_flag = df.multiple_match_flag.astype(int)
    return df

Example #3
0
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
from sklearn.metrics import classification_report

from utils.autoreview_utils import ItemSelector, DataFrameColumnTransformer, ClusterTransformer, AverageTfidfCosSimTransformer
# from features import avgDist_EF, avgDist_EF_avgTitleTfidfCosSim

from dotenv import load_dotenv
logger.debug("loading dotenv...")
success_load_dotenv = load_dotenv('admin.env')
if success_load_dotenv:
    logger.debug("dotenv loaded successfully.")
else:
    logger.warn("failed to load dotenv")
from mysql_connect import get_db_connection
db = get_db_connection('jp_autoreview')

from sqlalchemy.orm import sessionmaker
from models.jp_autoreview import PipelineTest

Session = sessionmaker(bind=db.engine)


class PipelineExperiment(object):
    """configure and run a pipeline for a review paper classifier"""
    def __init__(self,
                 clf,
                 transformer_list,
                 seed_papers=None,
                 random_state=999):
        """
def main(args):
    db = get_db_connection('jp_autoreview')
    Base.metadata.create_all(bind=db.engine)