Example #1
0
 def get_refine_sources(self):
     """Get's data from refine"""
     r_api = RefineAPI()
     r_sources = r_api.get_projects()
     if isinstance(r_sources, dict):
         self.refine_ok = True
         output = r_sources["projects"]
     else:
         output = False
     return output
Example #2
0
 def get_refine_sources(self):
     """Get's data from refine"""
     r_api = RefineAPI()
     r_sources = r_api.get_projects()
     if isinstance(r_sources, dict):
         self.refine_ok = True
         output = r_sources['projects']
     else:
         output = False
     return output
Example #3
0
 def get_refine_schema(self, refine_project):
     """ Gets the column schema from Refine """
     output = False
     r_api = RefineAPI(refine_project)
     self.source_id = r_api.source_id
     r_api.prepare_model()
     r_api.col_schema
     if r_api.col_schema is not False:
         self.refine_schema = r_api.col_schema
         output = True
     return output
Example #4
0
 def get_refine_schema(self, refine_project):
     """ Gets the column schema from Refine """
     output = False
     r_api = RefineAPI(refine_project)
     self.source_id = r_api.source_id
     r_api.prepare_model()
     r_api.col_schema
     if r_api.col_schema is not False:
         self.refine_schema = r_api.col_schema
         output = True
     return output
Example #5
0
 def relate_refine_local_sources(self):
     """ Relates Refine sources with Sources already loaded from Refine """
     unused_refine_sources = []
     r_sources = self.get_refine_sources()
     if r_sources is not False:
         # now sort these in reverse order of last updated
         date_proj_keyed = {}
         date_proj_keys = []
         for refine_project, ref_meta in r_sources.items():
             modified = ref_meta['modified']
             ref_mod_date = parse(ref_meta['modified'])
             unix_mod = time.mktime(ref_mod_date.timetuple())
             # keep the project_id in to insure unique keys
             date_proj = str(unix_mod) + '00' + str(refine_project)
             date_proj = float(date_proj)
             ref_meta['id'] = refine_project
             date_proj_keyed[date_proj] = ref_meta
             date_proj_keys.append(date_proj)
         date_proj_keys.sort(reverse=True)
         print(str(date_proj_keys))
         r_api = RefineAPI()
         for date_proj in date_proj_keys:
             ref_meta = date_proj_keyed[date_proj]
             refine_project = ref_meta['id']
             source_id = r_api.convert_refine_to_source_id(refine_project)
             ref_created = parse(ref_meta['created'])
             ref_mod_date = parse(ref_meta['modified'])
             try:
                 p_source = ImportSource.objects.get(source_id=source_id)
             except ImportSource.DoesNotExist:
                 p_source = False
             if p_source is not False:
                 self.refine_reloadable[source_id] = refine_project
                 if ref_mod_date > p_source.updated:
                     # Refine data updated since the last time the source was updated
                     p_source.is_current = False
                     p_source.save()
                 if p_source.label != ref_meta['name']:
                     # different name, change in our instance
                     p_source.label = ref_meta['name']
                     p_source.save()
             else:
                 # the source_id is not improted yet, so it's still usable
                 # as a new import
                 ref_meta['created'] = ref_created
                 ref_meta['modified'] = ref_mod_date
                 unused_refine_sources.append(ref_meta)
     return unused_refine_sources
Example #6
0
 def relate_refine_local_sources(self):
     """ Relates Refine sources with Sources already loaded from Refine """
     unused_refine_sources = []
     r_sources = self.get_refine_sources()
     if r_sources is not False:
         # now sort these in reverse order of last updated
         date_proj_keyed = {}
         date_proj_keys = []
         for refine_project, ref_meta in r_sources.items():
             modified = ref_meta["modified"]
             ref_mod_date = parse(ref_meta["modified"])
             unix_mod = time.mktime(ref_mod_date.timetuple())
             # keep the project_id in to insure unique keys
             date_proj = str(unix_mod) + "00" + str(refine_project)
             date_proj = float(date_proj)
             ref_meta["id"] = refine_project
             date_proj_keyed[date_proj] = ref_meta
             date_proj_keys.append(date_proj)
         date_proj_keys.sort(reverse=True)
         print(str(date_proj_keys))
         r_api = RefineAPI()
         for date_proj in date_proj_keys:
             ref_meta = date_proj_keyed[date_proj]
             refine_project = ref_meta["id"]
             source_id = r_api.convert_refine_to_source_id(refine_project)
             ref_created = parse(ref_meta["created"])
             ref_mod_date = parse(ref_meta["modified"])
             try:
                 p_source = ImportSource.objects.get(source_id=source_id)
             except ImportSource.DoesNotExist:
                 p_source = False
             if p_source is not False:
                 self.refine_reloadable[source_id] = refine_project
                 if ref_mod_date > p_source.updated:
                     # Refine data updated since the last time the source was updated
                     p_source.is_current = False
                     p_source.save()
                 if p_source.label != ref_meta["name"]:
                     # different name, change in our instance
                     p_source.label = ref_meta["name"]
                     p_source.save()
             else:
                 # the source_id is not improted yet, so it's still usable
                 # as a new import
                 ref_meta["created"] = ref_created
                 ref_meta["modified"] = ref_mod_date
                 unused_refine_sources.append(ref_meta)
     return unused_refine_sources
Example #7
0
 def get_project(self, project_uuid):
     """ Processes the current batch, determined by the row number
         by running the individual import processes in the proper order
     """
     act_item = LastUpdatedOrderedDict()
     try:
         man_proj = Manifest.objects.get(uuid=project_uuid)
     except Manifest.DoesNotExist:
         act_item = False
     if act_item is not False:
         act_item['uuid'] = man_proj.uuid
         act_item['label'] = man_proj.label
         act_item['published'] = man_proj.published
         act_item['revised'] = man_proj.revised
         try:
             pobj = Project.objects.get(uuid=man_proj.uuid)
             act_item['edit_status'] = pobj.edit_status
             act_item['short_des'] = pobj.short_des
         except Project.DoesNotExist:
             act_item['edit_status'] = False
             act_item['short_des'] = ''
         # get sources from refine first, since it lets us know if updated
         refine_sources = self.relate_refine_local_sources()
         raw_p_sources = ImportSource.objects\
                                     .filter(project_uuid=project_uuid)\
                                     .order_by('-updated')
         raw_p_sources = self.note_unimport_ok(raw_p_sources)
         p_sources = self.note_reloadable_sources(raw_p_sources)
         act_item['sources'] = p_sources
         act_item['refines'] = refine_sources
         act_item['ref_baseurl'] = RefineAPI().get_project_base_url()
         act_item['refine_ok'] = self.refine_ok
     return act_item
Example #8
0
 def create_new_refine_source(self):
     """ Saves a record of a new Refine data source """
     if self.imp_source_obj is False:
         r_api = RefineAPI(self.refine_project)
         meta = r_api.get_metadata()
         size = r_api.get_size()
         if meta is not False and size is not False:
             imp_s = ImportSource()
             imp_s.source_id = self.source_id
             imp_s.project_uuid = self.project_uuid
             imp_s.label = meta['name']
             imp_s.field_count = size['field_count']
             imp_s.row_count = size['row_count']
             imp_s.source_type = 'refine'
             imp_s.is_current = True
             imp_s.imp_status = self.DEFAULT_LOADING_STATUS
             imp_s.save()
             self.imp_source_obj = imp_s
             self.row_count = imp_s.row_count
             self.imp_status = self.DEFAULT_LOADING_STATUS
Example #9
0
 def create_new_refine_source(self):
     """ Saves a record of a new Refine data source """
     if self.imp_source_obj is False:
         r_api = RefineAPI(self.refine_project)
         meta = r_api.get_metadata()
         size = r_api.get_size()
         if meta is not False and size is not False:
             imp_s = ImportSource()
             imp_s.source_id = self.source_id
             imp_s.project_uuid = self.project_uuid
             imp_s.label = meta['name']
             imp_s.field_count = size['field_count']
             imp_s.row_count = size['row_count']
             imp_s.source_type = 'refine'
             imp_s.is_current = True
             imp_s.imp_status = self.DEFAULT_LOADING_STATUS
             imp_s.save()
             self.imp_source_obj = imp_s
             self.row_count = imp_s.row_count
             self.imp_status = self.DEFAULT_LOADING_STATUS
Example #10
0
 def save_refine_records(self, refine_project, start=False):
     """ Loads a schema from refine, saves it in the database """
     row_num = 0
     r_api = RefineAPI(refine_project)
     self.source_id = r_api.source_id
     if self.do_batch:
         # get a batch of data
         r_api.get_data_batch_to_model(start)
     else:
         # get all the data at once from Refine (not in batches)
         r_api.get_data_to_model()
     if len(r_api.data) > 0:
         print('Records to import: ' + str(len(r_api.data)))
         bulk_list = []
         for record in r_api.data:
             row_num = record['row_num']
             for field_num, cell_value in record['cells'].items():
                 imp_cell = ImportCell()
                 imp_cell.source_id = self.source_id
                 imp_cell.project_uuid = self.project_uuid
                 imp_cell.row_num = row_num
                 imp_cell.field_num = int(float(field_num))
                 imp_cell.rec_hash = ImportCell().make_rec_hash(
                     self.project_uuid, str(cell_value))
                 imp_cell.fl_uuid = False
                 imp_cell.l_uuid = False
                 imp_cell.cell_ok = True  # default to Import OK
                 imp_cell.record = str(cell_value)
                 # imp_cell.save()
                 bulk_list.append(imp_cell)
         ImportCell.objects.bulk_create(bulk_list)
         bulk_list = None
         print('Done with: ' + str(row_num))
     return row_num
Example #11
0
    def import_refine_to_project(self, refine_project, project_uuid):
        """ Imports data from refine.
            The start of each batch is determined by a
            database call.

            This defaults to importing in batches!
        """
        self.refine_project = refine_project
        self.project_uuid = project_uuid
        r_api = RefineAPI(refine_project)
        self.source_id = r_api.source_id
        self.gen_obsolete_source_id()
        self.get_refine_source_meta(
        )  # get's metadata about the refine source as stored in the database
        if self.imp_status is False:
            # new import, create a new refine source metadata record
            # print('Wholly new import!')
            self.create_new_refine_source()
        if self.imp_status == self.DEFAULT_LOADING_STATUS:
            # still have records to import from refine
            output = self.execute_import_refine_to_project(
                refine_project, r_api)
        elif self.DEFAULT_FIELD_UUID_ASSIGN in self.imp_status and self.make_uuids:
            # records are imported from refine, but still have uuids to assign
            done = self.field_make_perserve_uuids()
            output = {
                'refine': refine_project,
                'source_id': self.source_id,
                'row_count': self.row_count,
                'batch_size': r_api.row_request_limit,
                'start': self.row_count,
                'end': self.row_count,
                'field_count': self.imp_source_obj.field_count,
                'act_uuid_field': self.act_uuid_field,
                'make_uuids': self.make_uuids,
                'done': done
            }
        else:
            output = {
                'refine': refine_project,
                'source_id': self.source_id,
                'row_count': self.row_count,
                'batch_size': r_api.row_request_limit,
                'start': self.row_count,
                'end': self.row_count,
                'field_count': self.imp_source_obj.field_count,
                'act_uuid_field': self.imp_source_obj.field_count,
                'make_uuids': self.make_uuids,
                'done': True
            }
        return output
Example #12
0
 def save_refine_records(self,
                         refine_project,
                         start=False):
     """ Loads a schema from refine, saves it in the database """
     row_num = 0
     r_api = RefineAPI(refine_project)
     self.source_id = r_api.source_id
     if self.do_batch:
         # get a batch of data
         r_api.get_data_batch_to_model(start)
     else:
         # get all the data at once from Refine (not in batches)
         r_api.get_data_to_model()
     if len(r_api.data) > 0:
         print('Records to import: ' + str(len(r_api.data)))
         bulk_list = []
         for record in r_api.data:
             row_num = record['row_num']
             for field_num, cell_value in record['cells'].items():
                 imp_cell = ImportCell()
                 imp_cell.source_id = self.source_id
                 imp_cell.project_uuid = self.project_uuid
                 imp_cell.row_num = row_num
                 imp_cell.field_num = int(float(field_num))
                 imp_cell.rec_hash = ImportCell().make_rec_hash(self.project_uuid,
                                                                str(cell_value))
                 imp_cell.fl_uuid = False
                 imp_cell.l_uuid = False
                 imp_cell.cell_ok = True  # default to Import OK
                 imp_cell.record = str(cell_value)
                 # imp_cell.save()
                 bulk_list.append(imp_cell)
         ImportCell.objects.bulk_create(bulk_list)
         bulk_list = None
         print('Done with: ' + str(row_num))
     return row_num