예제 #1
0
    def get_load_config(self):
        load_config = LoadConfig()
        load_config.root_directory = self.root_directory
        load_config.process_count = psutil.cpu_count()

        load_config.server = self.server
        load_config.server_username = self.server_username
        load_config.server_password = self.server_password
        load_config.index = self.index
        load_config.type = self.type

        load_config.data_mapper = self.get_data_mapper()
        load_config.data_extractor = self.get_data_extractor()
        load_config.max_memory_percent = self.get_max_memory_percent()

        return load_config
예제 #2
0
def create_load_config():
    load_config = LoadConfig()
    load_config.root_directory = ROOT_DIRECTORY
    # load_config.data_source_name = 'extended_relations'
    load_config.process_count = psutil.cpu_count()

    load_config.server = LOCAL_SERVER
    load_config.index = INDEX
    load_config.type = TYPE

    load_config.data_mapper = IRDBDataMapper()
    load_config.data_extractor = IRDBDataExtractor()
    # load_config.data_source_name = file_name.split('.')[0]
    load_config.max_memory_percent = 75

    return load_config
예제 #3
0
def get_load_config():
    load_config = LoadConfig()
    load_config.root_directory = ROOT_DIRECTORY

    load_config.server = SERVER
    load_config.index = INDEX
    load_config.type = TYPE

    load_config.process_count = PROCESS_COUNT
    load_config.bulk_data_size = BULK_DATA_SIZE
    load_config.data_loader_batch_size = DATA_LOADER_BATCH_SIZE
    load_config.data_source_batch_size = DATA_SOURCE_BATCH_SIZE
    load_config.doc_fetch_batch_size = DOC_FETCH_BATCH_SIZE

    # load_config.log_level = LOG_LEVEL_TRACE

    load_config.data_extractor = CTDataExtractor()
    load_config.data_mapper = CTDataMapper()
    # load_config.data_source_name = file_name.split('.')[0]

    load_config.max_memory_percent = 80

    return load_config
예제 #4
0
                if 'removed_citations' in update_history_item:
                    removed_citations = update_history_item[
                        'removed_citations']

                    citations = list(set(citations) - set(removed_citations))

        return citations


load_config = LoadConfig()
load_config.root_directory = '/data/data_loading/pubmed_2019/pubmed2019/fix_citations'
# load_config.process_count = psutil.cpu_count()

load_config.server = 'http://localhost:9200'
load_config.server_username = ''
load_config.server_password = ''
load_config.index = "pubmed2019"
load_config.type = "article"

load_config.data_mapper = PubmedDataMapper()
load_config.data_extractor = PubmedDataExtractor()
load_config.max_memory_percent = 75

load_config.source = ""
load_config.append_relations = False

load_config.process_count = 4

fix_citations = FixCitations(load_config)
fix_citations.run()