def get_refine_sources(self): """Get's data from refine""" r_api = RefineAPI() r_sources = r_api.get_projects() if isinstance(r_sources, dict): self.refine_ok = True output = r_sources["projects"] else: output = False return output
def get_refine_sources(self): """Get's data from refine""" r_api = RefineAPI() r_sources = r_api.get_projects() if isinstance(r_sources, dict): self.refine_ok = True output = r_sources['projects'] else: output = False return output
def get_refine_schema(self, refine_project): """ Gets the column schema from Refine """ output = False r_api = RefineAPI(refine_project) self.source_id = r_api.source_id r_api.prepare_model() r_api.col_schema if r_api.col_schema is not False: self.refine_schema = r_api.col_schema output = True return output
def relate_refine_local_sources(self): """ Relates Refine sources with Sources already loaded from Refine """ unused_refine_sources = [] r_sources = self.get_refine_sources() if r_sources is not False: # now sort these in reverse order of last updated date_proj_keyed = {} date_proj_keys = [] for refine_project, ref_meta in r_sources.items(): modified = ref_meta['modified'] ref_mod_date = parse(ref_meta['modified']) unix_mod = time.mktime(ref_mod_date.timetuple()) # keep the project_id in to insure unique keys date_proj = str(unix_mod) + '00' + str(refine_project) date_proj = float(date_proj) ref_meta['id'] = refine_project date_proj_keyed[date_proj] = ref_meta date_proj_keys.append(date_proj) date_proj_keys.sort(reverse=True) print(str(date_proj_keys)) r_api = RefineAPI() for date_proj in date_proj_keys: ref_meta = date_proj_keyed[date_proj] refine_project = ref_meta['id'] source_id = r_api.convert_refine_to_source_id(refine_project) ref_created = parse(ref_meta['created']) ref_mod_date = parse(ref_meta['modified']) try: p_source = ImportSource.objects.get(source_id=source_id) except ImportSource.DoesNotExist: p_source = False if p_source is not False: self.refine_reloadable[source_id] = refine_project if ref_mod_date > p_source.updated: # Refine data updated since the last time the source was updated p_source.is_current = False p_source.save() if p_source.label != ref_meta['name']: # different name, change in our instance p_source.label = ref_meta['name'] p_source.save() else: # the source_id is not improted yet, so it's still usable # as a new import ref_meta['created'] = ref_created ref_meta['modified'] = ref_mod_date unused_refine_sources.append(ref_meta) return unused_refine_sources
def relate_refine_local_sources(self): """ Relates Refine sources with Sources already loaded from Refine """ unused_refine_sources = [] r_sources = self.get_refine_sources() if r_sources is not False: # now sort these in reverse order of last updated date_proj_keyed = {} date_proj_keys = [] for refine_project, ref_meta in r_sources.items(): modified = ref_meta["modified"] ref_mod_date = parse(ref_meta["modified"]) unix_mod = time.mktime(ref_mod_date.timetuple()) # keep the project_id in to insure unique keys date_proj = str(unix_mod) + "00" + str(refine_project) date_proj = float(date_proj) ref_meta["id"] = refine_project date_proj_keyed[date_proj] = ref_meta date_proj_keys.append(date_proj) date_proj_keys.sort(reverse=True) print(str(date_proj_keys)) r_api = RefineAPI() for date_proj in date_proj_keys: ref_meta = date_proj_keyed[date_proj] refine_project = ref_meta["id"] source_id = r_api.convert_refine_to_source_id(refine_project) ref_created = parse(ref_meta["created"]) ref_mod_date = parse(ref_meta["modified"]) try: p_source = ImportSource.objects.get(source_id=source_id) except ImportSource.DoesNotExist: p_source = False if p_source is not False: self.refine_reloadable[source_id] = refine_project if ref_mod_date > p_source.updated: # Refine data updated since the last time the source was updated p_source.is_current = False p_source.save() if p_source.label != ref_meta["name"]: # different name, change in our instance p_source.label = ref_meta["name"] p_source.save() else: # the source_id is not improted yet, so it's still usable # as a new import ref_meta["created"] = ref_created ref_meta["modified"] = ref_mod_date unused_refine_sources.append(ref_meta) return unused_refine_sources
def get_project(self, project_uuid): """ Processes the current batch, determined by the row number by running the individual import processes in the proper order """ act_item = LastUpdatedOrderedDict() try: man_proj = Manifest.objects.get(uuid=project_uuid) except Manifest.DoesNotExist: act_item = False if act_item is not False: act_item['uuid'] = man_proj.uuid act_item['label'] = man_proj.label act_item['published'] = man_proj.published act_item['revised'] = man_proj.revised try: pobj = Project.objects.get(uuid=man_proj.uuid) act_item['edit_status'] = pobj.edit_status act_item['short_des'] = pobj.short_des except Project.DoesNotExist: act_item['edit_status'] = False act_item['short_des'] = '' # get sources from refine first, since it lets us know if updated refine_sources = self.relate_refine_local_sources() raw_p_sources = ImportSource.objects\ .filter(project_uuid=project_uuid)\ .order_by('-updated') raw_p_sources = self.note_unimport_ok(raw_p_sources) p_sources = self.note_reloadable_sources(raw_p_sources) act_item['sources'] = p_sources act_item['refines'] = refine_sources act_item['ref_baseurl'] = RefineAPI().get_project_base_url() act_item['refine_ok'] = self.refine_ok return act_item
def create_new_refine_source(self): """ Saves a record of a new Refine data source """ if self.imp_source_obj is False: r_api = RefineAPI(self.refine_project) meta = r_api.get_metadata() size = r_api.get_size() if meta is not False and size is not False: imp_s = ImportSource() imp_s.source_id = self.source_id imp_s.project_uuid = self.project_uuid imp_s.label = meta['name'] imp_s.field_count = size['field_count'] imp_s.row_count = size['row_count'] imp_s.source_type = 'refine' imp_s.is_current = True imp_s.imp_status = self.DEFAULT_LOADING_STATUS imp_s.save() self.imp_source_obj = imp_s self.row_count = imp_s.row_count self.imp_status = self.DEFAULT_LOADING_STATUS
def save_refine_records(self, refine_project, start=False): """ Loads a schema from refine, saves it in the database """ row_num = 0 r_api = RefineAPI(refine_project) self.source_id = r_api.source_id if self.do_batch: # get a batch of data r_api.get_data_batch_to_model(start) else: # get all the data at once from Refine (not in batches) r_api.get_data_to_model() if len(r_api.data) > 0: print('Records to import: ' + str(len(r_api.data))) bulk_list = [] for record in r_api.data: row_num = record['row_num'] for field_num, cell_value in record['cells'].items(): imp_cell = ImportCell() imp_cell.source_id = self.source_id imp_cell.project_uuid = self.project_uuid imp_cell.row_num = row_num imp_cell.field_num = int(float(field_num)) imp_cell.rec_hash = ImportCell().make_rec_hash( self.project_uuid, str(cell_value)) imp_cell.fl_uuid = False imp_cell.l_uuid = False imp_cell.cell_ok = True # default to Import OK imp_cell.record = str(cell_value) # imp_cell.save() bulk_list.append(imp_cell) ImportCell.objects.bulk_create(bulk_list) bulk_list = None print('Done with: ' + str(row_num)) return row_num
def import_refine_to_project(self, refine_project, project_uuid): """ Imports data from refine. The start of each batch is determined by a database call. This defaults to importing in batches! """ self.refine_project = refine_project self.project_uuid = project_uuid r_api = RefineAPI(refine_project) self.source_id = r_api.source_id self.gen_obsolete_source_id() self.get_refine_source_meta( ) # get's metadata about the refine source as stored in the database if self.imp_status is False: # new import, create a new refine source metadata record # print('Wholly new import!') self.create_new_refine_source() if self.imp_status == self.DEFAULT_LOADING_STATUS: # still have records to import from refine output = self.execute_import_refine_to_project( refine_project, r_api) elif self.DEFAULT_FIELD_UUID_ASSIGN in self.imp_status and self.make_uuids: # records are imported from refine, but still have uuids to assign done = self.field_make_perserve_uuids() output = { 'refine': refine_project, 'source_id': self.source_id, 'row_count': self.row_count, 'batch_size': r_api.row_request_limit, 'start': self.row_count, 'end': self.row_count, 'field_count': self.imp_source_obj.field_count, 'act_uuid_field': self.act_uuid_field, 'make_uuids': self.make_uuids, 'done': done } else: output = { 'refine': refine_project, 'source_id': self.source_id, 'row_count': self.row_count, 'batch_size': r_api.row_request_limit, 'start': self.row_count, 'end': self.row_count, 'field_count': self.imp_source_obj.field_count, 'act_uuid_field': self.imp_source_obj.field_count, 'make_uuids': self.make_uuids, 'done': True } return output
def save_refine_records(self, refine_project, start=False): """ Loads a schema from refine, saves it in the database """ row_num = 0 r_api = RefineAPI(refine_project) self.source_id = r_api.source_id if self.do_batch: # get a batch of data r_api.get_data_batch_to_model(start) else: # get all the data at once from Refine (not in batches) r_api.get_data_to_model() if len(r_api.data) > 0: print('Records to import: ' + str(len(r_api.data))) bulk_list = [] for record in r_api.data: row_num = record['row_num'] for field_num, cell_value in record['cells'].items(): imp_cell = ImportCell() imp_cell.source_id = self.source_id imp_cell.project_uuid = self.project_uuid imp_cell.row_num = row_num imp_cell.field_num = int(float(field_num)) imp_cell.rec_hash = ImportCell().make_rec_hash(self.project_uuid, str(cell_value)) imp_cell.fl_uuid = False imp_cell.l_uuid = False imp_cell.cell_ok = True # default to Import OK imp_cell.record = str(cell_value) # imp_cell.save() bulk_list.append(imp_cell) ImportCell.objects.bulk_create(bulk_list) bulk_list = None print('Done with: ' + str(row_num)) return row_num