Ejemplo n.º 1
0
 def make_import_records_from_feature(self, row_num, feature):
     """ Saves a feature into the importer with properties as different
         'fields' in the importer
     """
     bulk_list = []
     if 'geometry' in feature:
         f_geo = feature['geometry']
     else:
         f_geo = False
     if 'properties' in feature:
         f_props = feature['properties']
         for prop_key, col_index in self.fields.items():
             record = None
             if prop_key in f_props:
                 record = f_props[prop_key]
                 guessed_data_type = self.get_guessed_prop_data_type(prop_key)
                 record = self.transform_validate_record(guessed_data_type,
                                                         record)
             elif prop_key == self.geometry_field_name:
                 # we have a geojson field
                 record = json.dumps(f_geo,
                                     ensure_ascii=False,
                                     indent=4)
             if record is not None:
                 imp_cell = ImportCell()
                 imp_cell.source_id = self.source_id
                 imp_cell.project_uuid = self.project_uuid
                 imp_cell.row_num = row_num
                 imp_cell.field_num = col_index
                 imp_cell.rec_hash = ImportCell().make_rec_hash(self.project_uuid,
                                                                str(record))
                 imp_cell.fl_uuid = False
                 imp_cell.l_uuid = False
                 imp_cell.cell_ok = True  # default to Import OK
                 imp_cell.record = str(record)
                 bulk_list.append(imp_cell)
     return bulk_list
Ejemplo n.º 2
0
 def save_refine_records(self,
                         refine_project,
                         start=False):
     """ Loads a schema from refine, saves it in the database """
     row_num = 0
     r_api = RefineAPI(refine_project)
     self.source_id = r_api.source_id
     if self.do_batch:
         # get a batch of data
         r_api.get_data_batch_to_model(start)
     else:
         # get all the data at once from Refine (not in batches)
         r_api.get_data_to_model()
     if len(r_api.data) > 0:
         print('Records to import: ' + str(len(r_api.data)))
         bulk_list = []
         for record in r_api.data:
             row_num = record['row_num']
             for field_num, cell_value in record['cells'].items():
                 imp_cell = ImportCell()
                 imp_cell.source_id = self.source_id
                 imp_cell.project_uuid = self.project_uuid
                 imp_cell.row_num = row_num
                 imp_cell.field_num = int(float(field_num))
                 imp_cell.rec_hash = ImportCell().make_rec_hash(self.project_uuid,
                                                                str(cell_value))
                 imp_cell.fl_uuid = False
                 imp_cell.l_uuid = False
                 imp_cell.cell_ok = True  # default to Import OK
                 imp_cell.record = str(cell_value)
                 # imp_cell.save()
                 bulk_list.append(imp_cell)
         ImportCell.objects.bulk_create(bulk_list)
         bulk_list = None
         print('Done with: ' + str(row_num))
     return row_num
Ejemplo n.º 3
0
 def get_record_hash(self, record):
     """ Gets the hash value for a record given the current project_uuid """
     imp_c = ImportCell()
     rec_hash = imp_c.make_rec_hash(self.project_uuid,
                                    record)
     return str(rec_hash)
Ejemplo n.º 4
0
 def get_record_hash(self, record):
     """ Gets the hash value for a record given the current project_uuid """
     imp_c = ImportCell()
     rec_hash = imp_c.make_rec_hash(self.project_uuid, record)
     return str(rec_hash)
Ejemplo n.º 5
0
 def save_refine_records(self, refine_project, start=False):
     """ Loads a schema from refine, saves it in the database """
     row_num = 0
     r_api = RefineAPI(refine_project)
     self.source_id = r_api.source_id
     if self.do_batch:
         # get a batch of data
         r_api.get_data_batch_to_model(start)
     else:
         # get all the data at once from Refine (not in batches)
         r_api.get_data_to_model()
     if len(r_api.data) > 0:
         print('Records to import: ' + str(len(r_api.data)))
         bulk_list = []
         for record in r_api.data:
             row_num = record['row_num']
             for field_num, cell_value in record['cells'].items():
                 imp_cell = ImportCell()
                 imp_cell.source_id = self.source_id
                 imp_cell.project_uuid = self.project_uuid
                 imp_cell.row_num = row_num
                 imp_cell.field_num = int(float(field_num))
                 imp_cell.rec_hash = ImportCell().make_rec_hash(
                     self.project_uuid, str(cell_value))
                 imp_cell.fl_uuid = False
                 imp_cell.l_uuid = False
                 imp_cell.cell_ok = True  # default to Import OK
                 imp_cell.record = str(cell_value)
                 # imp_cell.save()
                 bulk_list.append(imp_cell)
         ImportCell.objects.bulk_create(bulk_list)
         bulk_list = None
         print('Done with: ' + str(row_num))
     return row_num
Ejemplo n.º 6
0
    def save_dataframe_records(
        self,
        source_id,
        df,
        do_valiate=True
    ):
        """ Loads a schema from refine, saves it in the database """
        self.source_id = source_id
        if df.empty:
            return None
        print('Importing {} records from: {}'.format(
                len(df.index),
                self.source_id
            )
        )

        cols = df.columns.tolist()
        for i, row in df.iterrows():
            row_num = i + 1
            bulk_list = []
            for field_num, col in enumerate(cols, 1):
                cell_value = row[col]
                if cell_value in [np.nan, None, 'nan']:
                    cell_value = ''
                cell_value = str(cell_value).strip()
                if cell_value == 'nan':
                     cell_value = ''
                imp_cell = ImportCell()
                imp_cell.source_id = self.source_id
                imp_cell.project_uuid = self.project_uuid
                imp_cell.row_num = row_num
                imp_cell.field_num = field_num
                imp_cell.rec_hash = ImportCell().make_rec_hash(
                    self.project_uuid,
                    cell_value
                )
                imp_cell.fl_uuid = False
                imp_cell.l_uuid = False
                imp_cell.cell_ok = True  # default to Import OK
                imp_cell.record = cell_value
                bulk_list.append(imp_cell)
            # Now bulk create the list of records in this row
            ImportCell.objects.bulk_create(bulk_list)
            bulk_list = None
        df_len = len(df.index)
        print('FINISHED import of {} records from: {}'.format(
                len(df.index),
                self.source_id
            )
        )
        if not do_valiate:
            return row_num
        for field_num, col in enumerate(cols, 1):
            rec_count = ImportCell.objects.filter(
                source_id=self.source_id,
                field_num=field_num
            ).count()
            print('Imported {} [{}]: {}, expected {}'.format(
                    col,
                    field_num,
                    rec_count,
                    df_len
                )
            )
            assert rec_count == df_len
        return row_num
Ejemplo n.º 7
0
 def make_import_records_from_feature(self, row_num, feature):
     """ Saves a feature into the importer with properties as different
         'fields' in the importer
     """
     bulk_list = []
     if 'geometry' in feature:
         f_geo = feature['geometry']
     else:
         f_geo = False
     if 'properties' in feature:
         f_props = feature['properties']
         for prop_key, col_index in self.fields.items():
             record = None
             if prop_key in f_props:
                 record = f_props[prop_key]
                 guessed_data_type = self.get_guessed_prop_data_type(
                     prop_key)
                 record = self.transform_validate_record(
                     guessed_data_type, record)
             elif prop_key == self.geometry_field_name:
                 # we have a geojson field
                 record = json.dumps(f_geo, ensure_ascii=False, indent=4)
             if record is not None:
                 imp_cell = ImportCell()
                 imp_cell.source_id = self.source_id
                 imp_cell.project_uuid = self.project_uuid
                 imp_cell.row_num = row_num
                 imp_cell.field_num = col_index
                 imp_cell.rec_hash = ImportCell().make_rec_hash(
                     self.project_uuid, str(record))
                 imp_cell.fl_uuid = False
                 imp_cell.l_uuid = False
                 imp_cell.cell_ok = True  # default to Import OK
                 imp_cell.record = str(record)
                 bulk_list.append(imp_cell)
     return bulk_list