def get_load_config(self): load_config = LoadConfig() load_config.root_directory = self.root_directory load_config.process_count = psutil.cpu_count() load_config.server = self.server load_config.server_username = self.server_username load_config.server_password = self.server_password load_config.index = self.index load_config.type = self.type load_config.data_mapper = self.get_data_mapper() load_config.data_extractor = self.get_data_extractor() load_config.max_memory_percent = self.get_max_memory_percent() return load_config
def create_load_config(): load_config = LoadConfig() load_config.root_directory = ROOT_DIRECTORY # load_config.data_source_name = 'extended_relations' load_config.process_count = psutil.cpu_count() load_config.server = LOCAL_SERVER load_config.index = INDEX load_config.type = TYPE load_config.data_mapper = IRDBDataMapper() load_config.data_extractor = IRDBDataExtractor() # load_config.data_source_name = file_name.split('.')[0] load_config.max_memory_percent = 75 return load_config
def get_load_config(): load_config = LoadConfig() load_config.root_directory = ROOT_DIRECTORY load_config.server = SERVER load_config.index = INDEX load_config.type = TYPE load_config.process_count = PROCESS_COUNT load_config.bulk_data_size = BULK_DATA_SIZE load_config.data_loader_batch_size = DATA_LOADER_BATCH_SIZE load_config.data_source_batch_size = DATA_SOURCE_BATCH_SIZE load_config.doc_fetch_batch_size = DOC_FETCH_BATCH_SIZE # load_config.log_level = LOG_LEVEL_TRACE load_config.data_extractor = CTDataExtractor() load_config.data_mapper = CTDataMapper() # load_config.data_source_name = file_name.split('.')[0] load_config.max_memory_percent = 80 return load_config
if 'removed_citations' in update_history_item: removed_citations = update_history_item[ 'removed_citations'] citations = list(set(citations) - set(removed_citations)) return citations load_config = LoadConfig() load_config.root_directory = '/data/data_loading/pubmed_2019/pubmed2019/fix_citations' # load_config.process_count = psutil.cpu_count() load_config.server = 'http://localhost:9200' load_config.server_username = '' load_config.server_password = '' load_config.index = "pubmed2019" load_config.type = "article" load_config.data_mapper = PubmedDataMapper() load_config.data_extractor = PubmedDataExtractor() load_config.max_memory_percent = 75 load_config.source = "" load_config.append_relations = False load_config.process_count = 4 fix_citations = FixCitations(load_config) fix_citations.run()