def output_results(df, columns): """ :param df: pandas.DataFrame :param columns: columns with paper_id and results :return: None """ logger = logging.getLogger(__name__) results_path = Config.get_next_results_file_path() upload_path = Config.get_results_upload_path() logger.info('Storing results in a CSV %s', results_path) CsvDatastore().store_results(df, results_path, columns) logger.info('Copying results to the upload file %s', upload_path) shutil.copyfile(results_path, upload_path) return
def store_authors(self): """ :return: None """ authors_file = 'Authors.txt' authors_path = Config.get_path_to_data_file(authors_file) self.logger.info('Reading authors from %s', authors_file) rows = Hdf5Datastore().store_table('authors_table', AuthorsHdf5, authors_path, AuthorsCsv) self.logger.info('Rows exported: %s', rows)
def store_papers(self): """ :return: None """ papers_file = 'Papers.txt' papers_path = Config.get_path_to_data_file(papers_file) self.logger.info('Reading papers from %s', papers_path) rows = Hdf5Datastore().store_table('papers_table', PapersHdf5, papers_path, PapersCsv) self.logger.info('Rows exported: %s', rows)
def load_paper_journal_matrix(self, papers, journals): """ :param papers: dictionary of {id: index} :param journals: dictionary of {id: index} :return: """ fpath = Config.get_path_to_data_file('Papers.txt') return CsvDatastore().csv_to_relation_matrix( fpath, PapersCsv.paper_id.value, papers, PapersCsv.journal_id.value, journals)
def load_paper_field_of_study_matrix(self, papers, fos): """ :param papers: dictionary of {id: index} :param fos: fields of study, dictionary of {id: index} :return: """ fpath = Config.get_path_to_data_file('PaperKeywords.txt') return CsvDatastore().csv_to_relation_matrix( fpath, PaperKeywordsCsv.paper_id.value, papers, PaperKeywordsCsv.field_id.value, fos)
def load_paper_conf_series_matrix(self, papers, conf_series): """ :param papers: dictionary of {id: index} :param conf_series: dictionary of {id: index} :return: """ fpath = Config.get_path_to_data_file('Papers.txt') return CsvDatastore().csv_to_relation_matrix( fpath, PapersCsv.paper_id.value, papers, PapersCsv.conference_series_id.value, conf_series)
def store_journals(self): """ :return: None """ journals_file = 'Journals.txt' journals_path = Config.get_path_to_data_file(journals_file) self.logger.info('Reading journals from %s', journals_path) rows = Hdf5Datastore().store_table('journals_table', JournalsHdf5, journals_path, JournalsCsv) self.logger.info('Rows exported: %s', rows)
def load_author_sequence_matrix(self, papers, authors): """ :param papers: dictionary of {id: index} :param authors: dictionary of {id: index} :return: """ fpath = Config.get_path_to_data_file('PaperAuthorAffiliations.txt') return CsvDatastore().csv_to_relation_matrix( fpath, PapAuthAff.paper_id.value, papers, PapAuthAff.author_id.value, authors, PapAuthAff.author_seq_number.value)
def store_affiliations(self): """ :return: None """ affiliations_file = 'Affiliations.txt' affiliations_path = Config.get_path_to_data_file(affiliations_file) self.logger.info('Reading affiliations from %s', affiliations_path) rows = Hdf5Datastore().store_table('affiliations_table', AffiliationsHdf5, affiliations_path, AffiliationsCsv) self.logger.info('Rows exported: %s', rows)
def store_conference_series(self): """ :return: None """ conf_series_file = 'Conferences.txt' conf_series_path = Config.get_path_to_data_file(conf_series_file) self.logger.info('Reading conference series from %s', conf_series_path) rows = Hdf5Datastore().store_table('conference_series_table', ConferenceSeriesHdf5, conf_series_path, ConferenceSeriesCsv) self.logger.info('Rows exported: %s', rows)
def load_citation_matrix(self, papers): """ Build adjacency matrix from list of edges in PaperReferences.txt file :param papers: dictionary of {id: index} :return: scipy.sparse.csr_matrix """ fpath = Config.get_path_to_data_file('PaperReferences.txt') return CsvDatastore().csv_to_relation_matrix(fpath, PapRef.paper_id.value, papers, PapRef.reference_id.value, papers)
def load_paper_affiliation_matrix(self, papers, affiliations): """ Build matrix of papers and affiliations from list of paper-affiliation relations in PaperAuthorAffiliations.txt file :param papers: dictionary of {id: index} :param affiliations: dictionary of {id: index} :return: scipy.sparse.csr_matrix """ fpath = Config.get_path_to_data_file('PaperAuthorAffiliations.txt') return CsvDatastore().csv_to_relation_matrix( fpath, PapAuthAff.paper_id.value, papers, PapAuthAff.affiliation_id.value, affiliations)
def store_fields_of_study(self): """ :return: None """ fos_file = 'FieldsOfStudy.txt' fos_path = Config.get_path_to_data_file(fos_file) self.logger.info('Reading fields of study from %s', fos_path) rows = Hdf5Datastore().store_table('fields_of_study_table', FieldsOfStudyHdf5, fos_path, FieldsOfStudyCsv) self.logger.info('Rows exported: %s', rows) return
def setup_logging(default_path='logging.json', default_level=logging.DEBUG): """ Setup logging configuration :param default_path: :param default_level: :return: None """ path = default_path if os.path.exists(path): with open(path, 'rt') as f: config = json.load(f) config['handlers']['file']['filename'] = \ Config.get_path_to_log_file('debug.log') logging.config.dictConfig(config) else: logging.basicConfig(level=default_level) return
def __init__(self, datastore_fname=Config.DATASTORE_FNAME): self.datastore_path = Config.get_path_to_hdf5_file(datastore_fname) self.logger = logging.getLogger(__name__)
def __init__(self, datastore_fname = Config.DATASTORE_FNAME): self.datastore_path = Config.get_path_to_hdf5_file(datastore_fname) self.logger = logging.getLogger(__name__)