Ejemplo n.º 1
0
    def log_files_directory(self):
        log_files_directory = LOG_FILES_DIRECTORY
        if self.root_directory is not None:
            log_files_directory = self.root_directory + '/' + log_files_directory

        file_utils.make_directory(log_files_directory)
        return log_files_directory
Ejemplo n.º 2
0
    def other_files_directory(self):
        other_files_directory = OTHER_FILES_DIRECTORY
        if self.root_directory is not None:
            other_files_directory = self.root_directory + '/' + other_files_directory

        file_utils.make_directory(other_files_directory)
        return other_files_directory
Ejemplo n.º 3
0
    def source_files_directory(self):
        source_files_directory = SOURCE_FILES_DIRECTORY
        if self.root_directory is not None:
            source_files_directory = self.root_directory + '/' + source_files_directory

        file_utils.make_directory(source_files_directory)
        return source_files_directory
Ejemplo n.º 4
0
    def generated_files_directory(self):
        generated_files_directory = GENERATED_FILES_DIRECTORY
        if self.root_directory is not None:
            generated_files_directory = self.root_directory + '/' + generated_files_directory

        file_utils.make_directory(generated_files_directory)
        return generated_files_directory
Ejemplo n.º 5
0
    def bulk_update_response_directory(self,
                                       data_source_batch_name,
                                       data_source_name=None):
        data_source_batch_directory = self.data_source_batch_directory(
            data_source_batch_name, data_source_name)
        bulk_update_response_directory = data_source_batch_directory + '/' + BULK_UPDATE_RESPONSE_DIRECTORY

        file_utils.make_directory(bulk_update_response_directory)
        return bulk_update_response_directory
Ejemplo n.º 6
0
    def loaded_docs_directory(self,
                              data_source_batch_name,
                              data_source_name=None):
        data_source_batch_directory = self.data_source_batch_directory(
            data_source_batch_name, data_source_name)
        loaded_docs_directory = data_source_batch_directory + '/' + LOADED_DOCS_DIRECTORY

        file_utils.make_directory(loaded_docs_directory)
        return loaded_docs_directory
Ejemplo n.º 7
0
    def failed_docs_directory(self,
                              data_source_batch_name,
                              data_source_name=None):
        data_source_batch_directory = self.data_source_batch_directory(
            data_source_batch_name, data_source_name)
        failed_docs_directory = data_source_batch_directory + '/' + FAILED_DOCS_DIRECTORY

        file_utils.make_directory(failed_docs_directory)
        return failed_docs_directory
Ejemplo n.º 8
0
    def data_source_directory(self, data_source_name=None):
        data_source_directory = self.generated_files_directory()
        if data_source_name is not None:
            data_source_directory = data_source_directory + '/' + data_source_name
        elif self.data_source_name is not None:
            data_source_directory = data_source_directory + '/' + self.data_source_name

        file_utils.make_directory(data_source_directory)
        return data_source_directory
Ejemplo n.º 9
0
    def data_source_batch_directory(self,
                                    data_source_batch_name,
                                    data_source_name=None):
        data_source_batch_directory = self.data_source_directory(
            data_source_name)
        if data_source_batch_name is not None:
            data_source_batch_directory = data_source_batch_directory + '/' + data_source_batch_name

        file_utils.make_directory(data_source_batch_directory)
        return data_source_batch_directory
Ejemplo n.º 10
0
    def process_relations_rows(self, data_rows, data_source_batch_name,
                               source_index_id):
        data_source_directory = self.load_config.data_source_directory()
        data_source_batch_directory = self.load_config.data_source_batch_directory(
            data_source_batch_name)
        data_source_batch_directory_for_source = data_source_batch_directory + '/' + source_index_id
        file_utils.make_directory(data_source_batch_directory_for_source)

        filtered_ids = []
        if self.mode == DataProcessor.MODE_RETRY_FAILED_DOCS or self.mode == DataProcessor.MODE_NORMAL_LOAD:
            loaded_ids = self.get_loaded_ids(
                data_source_batch_directory_for_source)
        else:
            loaded_ids = {}

        # filter ids
        for _id in data_rows:
            if _id not in loaded_ids:
                filtered_ids.append(_id)

        self.load_config.log(LOG_LEVEL_INFO, 'source index', source_index_id)
        self.load_config.log(LOG_LEVEL_INFO, 'loaded ids', len(loaded_ids))
        self.load_config.log(LOG_LEVEL_INFO, 'ids to load', len(filtered_ids))

        if self.mode is not DataProcessor.MODE_NORMAL_LOAD:
            batch_id = str(int(round(time.time() * 1000)))
            old_data_source_batch_directory = data_source_directory + '/' + 'old_' + data_source_batch_name + '_' + batch_id
            os.rename(data_source_batch_directory,
                      old_data_source_batch_directory)

        batch = {}
        count = 0
        for _id in filtered_ids:
            data = data_rows.pop(_id, None)
            batch[_id] = data
            count += 1
            # if count % 1000 == 0:
            #     print 'Adding id to batch', _id

            if count % self.load_config.data_loader_batch_size == 0:
                self.start_relationship_load_process(batch,
                                                     data_source_batch_name,
                                                     source_index_id)
                batch = {}

        if len(batch) > 0:
            self.start_relationship_load_process(batch, data_source_batch_name,
                                                 source_index_id)

        self.join_processes()
Ejemplo n.º 11
0
    def rename_failed_ids_directory(self):
        # data_source_batch_name = os.path.basename(self.data_source_batch_directory)
        # data_source_directory = os.path.dirname(self.data_source_batch_directory)
        failed_docs_directory = self.load_config.failed_docs_directory(
            self.data_source_batch_name)
        failed_docs_directory_path = os.path.dirname(failed_docs_directory)
        failed_docs_directory_name = os.path.basename(failed_docs_directory)

        batch_id = str(int(round(time.time() * 1000)))
        old_failed_docs_directory = failed_docs_directory_path + \
            '/' + 'old_' + batch_id + '_' + failed_docs_directory_name
        os.rename(failed_docs_directory, old_failed_docs_directory)

        file_utils.make_directory(failed_docs_directory)