def make_import_records_from_feature(self, row_num, feature): """ Saves a feature into the importer with properties as different 'fields' in the importer """ bulk_list = [] if 'geometry' in feature: f_geo = feature['geometry'] else: f_geo = False if 'properties' in feature: f_props = feature['properties'] for prop_key, col_index in self.fields.items(): record = None if prop_key in f_props: record = f_props[prop_key] guessed_data_type = self.get_guessed_prop_data_type(prop_key) record = self.transform_validate_record(guessed_data_type, record) elif prop_key == self.geometry_field_name: # we have a geojson field record = json.dumps(f_geo, ensure_ascii=False, indent=4) if record is not None: imp_cell = ImportCell() imp_cell.source_id = self.source_id imp_cell.project_uuid = self.project_uuid imp_cell.row_num = row_num imp_cell.field_num = col_index imp_cell.rec_hash = ImportCell().make_rec_hash(self.project_uuid, str(record)) imp_cell.fl_uuid = False imp_cell.l_uuid = False imp_cell.cell_ok = True # default to Import OK imp_cell.record = str(record) bulk_list.append(imp_cell) return bulk_list
def save_refine_records(self, refine_project, start=False): """ Loads a schema from refine, saves it in the database """ row_num = 0 r_api = RefineAPI(refine_project) self.source_id = r_api.source_id if self.do_batch: # get a batch of data r_api.get_data_batch_to_model(start) else: # get all the data at once from Refine (not in batches) r_api.get_data_to_model() if len(r_api.data) > 0: print('Records to import: ' + str(len(r_api.data))) bulk_list = [] for record in r_api.data: row_num = record['row_num'] for field_num, cell_value in record['cells'].items(): imp_cell = ImportCell() imp_cell.source_id = self.source_id imp_cell.project_uuid = self.project_uuid imp_cell.row_num = row_num imp_cell.field_num = int(float(field_num)) imp_cell.rec_hash = ImportCell().make_rec_hash(self.project_uuid, str(cell_value)) imp_cell.fl_uuid = False imp_cell.l_uuid = False imp_cell.cell_ok = True # default to Import OK imp_cell.record = str(cell_value) # imp_cell.save() bulk_list.append(imp_cell) ImportCell.objects.bulk_create(bulk_list) bulk_list = None print('Done with: ' + str(row_num)) return row_num
def get_record_hash(self, record): """ Gets the hash value for a record given the current project_uuid """ imp_c = ImportCell() rec_hash = imp_c.make_rec_hash(self.project_uuid, record) return str(rec_hash)
def save_refine_records(self, refine_project, start=False): """ Loads a schema from refine, saves it in the database """ row_num = 0 r_api = RefineAPI(refine_project) self.source_id = r_api.source_id if self.do_batch: # get a batch of data r_api.get_data_batch_to_model(start) else: # get all the data at once from Refine (not in batches) r_api.get_data_to_model() if len(r_api.data) > 0: print('Records to import: ' + str(len(r_api.data))) bulk_list = [] for record in r_api.data: row_num = record['row_num'] for field_num, cell_value in record['cells'].items(): imp_cell = ImportCell() imp_cell.source_id = self.source_id imp_cell.project_uuid = self.project_uuid imp_cell.row_num = row_num imp_cell.field_num = int(float(field_num)) imp_cell.rec_hash = ImportCell().make_rec_hash( self.project_uuid, str(cell_value)) imp_cell.fl_uuid = False imp_cell.l_uuid = False imp_cell.cell_ok = True # default to Import OK imp_cell.record = str(cell_value) # imp_cell.save() bulk_list.append(imp_cell) ImportCell.objects.bulk_create(bulk_list) bulk_list = None print('Done with: ' + str(row_num)) return row_num
def save_dataframe_records( self, source_id, df, do_valiate=True ): """ Loads a schema from refine, saves it in the database """ self.source_id = source_id if df.empty: return None print('Importing {} records from: {}'.format( len(df.index), self.source_id ) ) cols = df.columns.tolist() for i, row in df.iterrows(): row_num = i + 1 bulk_list = [] for field_num, col in enumerate(cols, 1): cell_value = row[col] if cell_value in [np.nan, None, 'nan']: cell_value = '' cell_value = str(cell_value).strip() if cell_value == 'nan': cell_value = '' imp_cell = ImportCell() imp_cell.source_id = self.source_id imp_cell.project_uuid = self.project_uuid imp_cell.row_num = row_num imp_cell.field_num = field_num imp_cell.rec_hash = ImportCell().make_rec_hash( self.project_uuid, cell_value ) imp_cell.fl_uuid = False imp_cell.l_uuid = False imp_cell.cell_ok = True # default to Import OK imp_cell.record = cell_value bulk_list.append(imp_cell) # Now bulk create the list of records in this row ImportCell.objects.bulk_create(bulk_list) bulk_list = None df_len = len(df.index) print('FINISHED import of {} records from: {}'.format( len(df.index), self.source_id ) ) if not do_valiate: return row_num for field_num, col in enumerate(cols, 1): rec_count = ImportCell.objects.filter( source_id=self.source_id, field_num=field_num ).count() print('Imported {} [{}]: {}, expected {}'.format( col, field_num, rec_count, df_len ) ) assert rec_count == df_len return row_num
def make_import_records_from_feature(self, row_num, feature): """ Saves a feature into the importer with properties as different 'fields' in the importer """ bulk_list = [] if 'geometry' in feature: f_geo = feature['geometry'] else: f_geo = False if 'properties' in feature: f_props = feature['properties'] for prop_key, col_index in self.fields.items(): record = None if prop_key in f_props: record = f_props[prop_key] guessed_data_type = self.get_guessed_prop_data_type( prop_key) record = self.transform_validate_record( guessed_data_type, record) elif prop_key == self.geometry_field_name: # we have a geojson field record = json.dumps(f_geo, ensure_ascii=False, indent=4) if record is not None: imp_cell = ImportCell() imp_cell.source_id = self.source_id imp_cell.project_uuid = self.project_uuid imp_cell.row_num = row_num imp_cell.field_num = col_index imp_cell.rec_hash = ImportCell().make_rec_hash( self.project_uuid, str(record)) imp_cell.fl_uuid = False imp_cell.l_uuid = False imp_cell.cell_ok = True # default to Import OK imp_cell.record = str(record) bulk_list.append(imp_cell) return bulk_list