def process_ids(self, ids, _index, _type, data_source_batch_name): # Process batch ids batch_ids = ids.keys() self.load_config.log(LOG_LEVEL_INFO, 'Processing batch: ', data_source_batch_name, ',', len(batch_ids), 'docs') data_source_batches = self.load_data_source_batches() data_source_batches[data_source_batch_name] = 0 self.save_data_source_batches(data_source_batches) failed_ids = self.process_ids_method( ids, _index, _type, data_source_batch_name) processed_indices = [] for _id in batch_ids: processed_indices.append(_id) # Save batch info batch_info = { 'batch_ids': batch_ids, 'processed_indices':processed_indices, 'failed_ids': failed_ids } # print 'Batch ids', len(ids) # print 'Saving batch info', batch_info data_source_directory = self.load_config.data_source_directory() file_utils.save_file(data_source_directory, data_source_batch_name + '.json', batch_info)
def get_progress(self): data_source_directory = self.load_config.data_source_directory() stats_file_name = self.data_source_stats_file_name() if self.data_source_stats is None: self.data_source_stats = file_utils.load_file( data_source_directory, stats_file_name) row_count = 0 unique_ids = 0 # if 'row_count' in self.data_source_stats: # row_count = self.data_source_stats['row_count'] if 'unique_ids' in self.data_source_stats: unique_ids = self.data_source_stats['unique_ids'] docs_loaded = self.get_loaded_doc_count() if unique_ids > 0: self.load_config.log(LOG_LEVEL_INFO, 'docs loaded', docs_loaded, 'unique_ids', unique_ids) progress = (docs_loaded / float(unique_ids)) * 100 self.data_source_stats['progress'] = progress file_utils.save_file(data_source_directory, stats_file_name, self.data_source_stats) return progress return -1
def save_language_data(self, language, language_data): """ TODO Refactoring! """ dir = self.directory + os.sep + "values" if language != self.default_lang: dir += "-" + language file_utils.save_file(language_data, dir, self.files_regex)
def save_stats(self): self.load_config.log(LOG_LEVEL_INFO, 'Saving stats...') stats = { 'total_rows': self.total_rows, 'total_ids': self.total_ids } self.load_config.log(LOG_LEVEL_INFO, stats) data_source_directory = self.load_config.data_source_directory() file_utils.save_file(data_source_directory, 'stats.json', stats)
def upload(files, target): if len(files) == 0: return common_error("No sent files") filenames = [] for file in files: # Looks if same filename was previously uploaded if not FileUtils.file_exist(target, file.filename): destination = FileUtils.save_file(target, file) if not destination: return common_error("Error uploading file...") if not FileUtils.is_valid_file(destination): return common_error("File format is not supported...") logging.info("Uploaded filename: %s" % destination) else: destination = FileUtils.get_destination(target, file.filename) logging.info("Previously uploaded filename: %s" % destination) SessionHelper.add_uploaded_file_to_session(file.filename) filenames.append(file.filename) return json.dumps(filenames)
def count_rows(self): data_source_directory = self.load_config.data_source_directory() stats_file_name = self.data_source_stats_file_name() self.data_source_stats = file_utils.load_file(data_source_directory, stats_file_name) if self.data_source_stats is None or len(self.data_source_stats) == 0: self.count = 0 self.data_source_batch = {} self.data_source.process_rows(0, self.count_row) self.load_config.log(LOG_LEVEL_INFO, 'Total rows:', self.count) self.load_config.log(LOG_LEVEL_INFO, 'Total ids:', len(self.data_source_batch)) self.data_source_stats = { 'row_count': self.count, 'unique_ids': len(self.data_source_batch) } file_utils.save_file(data_source_directory, stats_file_name, self.data_source_stats)
def save_batch_info(self, start_index, row_count, unique_ids, data_source_batch_name): data_source_directory = self.load_config.data_source_directory() self.load_config.log( LOG_LEVEL_DEBUG, 'Finished processing batches, saving batch data...') row_count = row_count - start_index batch_data = { } #self.get_data_source_batch_summary(data_source_batch_name) batch_data['start_index'] = start_index batch_data['row_count'] = row_count batch_data['unique_ids'] = unique_ids if not self.load_config.test_mode: file_utils.save_file(data_source_directory, data_source_batch_name + '.json', batch_data) self.load_config.log(LOG_LEVEL_INFO, 'Saved batch data', data_source_batch_name)
def save_summary(self, ids_to_load): data_loader_batch_name = file_utils.batch_file_name_with_prefix( DATA_LOADER_BATCH_PREFIX) # Find skipped ids for _id in ids_to_load: if _id not in self.updated_ids and _id not in self.indexed_ids and _id not in self.failed_docs: doc = self.data_loader_batch[_id] self.add_to_failed_docs(_id, doc, 'Skipped') # Save failed docs if len(self.failed_docs) > 0: file_utils.save_file(self.failed_docs_directory, data_loader_batch_name + '.json', self.failed_docs) # Save batch summary summary = { 'indexed_ids': self.indexed_ids.keys(), 'updated_ids': self.updated_ids.keys(), } file_utils.save_file(self.loaded_docs_directory, data_loader_batch_name + '.json', summary) # Print summary self.load_config.log( LOG_LEVEL_INFO, '---------------------------------------------------------------------------------------------' ) self.load_config.log(LOG_LEVEL_INFO, self.load_config.server, self.load_config.server_username, self.index, self.type, ' Updated docs:', len(self.updated_ids) + len(self.indexed_ids), ', Failed docs:', len(self.failed_docs)) self.load_config.log( LOG_LEVEL_INFO, '---------------------------------------------------------------------------------------------' )
def upload(file, target): if not file.filename: return common_error("No sent file") if not FileUtils.is_valid_file(file.filename): return common_error("File extension is not supported...") destination = FileUtils.save_file (file, target) if not destination: return common_error("Error uploading file...") logging.debug("Uploaded filename: %s" % destination) session['uploaded_filename'] = file.filename return json.dumps( dict( filename = file.filename ) )
def upload(file, target): if not file.filename: return common_error("No sent file") destination = FileUtils.save_file(file, target) if not destination: return common_error("Error uploading file...") if not FileUtils.is_valid_file(destination): return common_error("File extension is not supported...") logging.debug("Uploaded filename: %s" % destination) session['uploaded_filename'] = file.filename return json.dumps(dict(filename=file.filename))
def upload(files, target): if len(files) == 0: return common_error("No sent files") filenames = [] for file in files: destination = FileUtils.save_file(file, target) if not destination: return common_error("Error uploading file...") if not FileUtils.is_valid_file(destination): return common_error("File extension is not supported...") logging.info("Uploaded filename: %s" % destination) SessionHelper.add_uploaded_file_to_session(file.filename) filenames.append(file.filename) return json.dumps(filenames)
def save_data_source_batches(self, data_source_batches): data_source_directory = self.load_config.data_source_directory() file_utils.save_file(data_source_directory, DATA_SOURCE_BATCHES_FILE, data_source_batches)
def save_language_data(self, language, language_data): """ TODO Refactoring! """ dir = self.directory + os.sep + "{}.lproj".format(language) file_utils.save_file(language_data, dir, self.files_regex)
def save_summary(self): data_source_directory = self.load_config.data_source_directory() data_source_summary = self.get_combined_data_source_summary() file_utils.save_file(data_source_directory, 'summary.json', data_source_summary)