Esempio n. 1
0
 def writeThumbnailImage(self, filename, year, md5):
     try:
         scanDir = '%s/%s/%s' % (self.dzi, year, md5)
         channels = []
         for channel in os.listdir(scanDir):
             if os.path.isdir('%s%s%s' % (scanDir, os.sep, channel)):
                 channels.append(channel)
         outdir = '%s/%s' % (self.thumbnails, year)
         if not os.path.exists(outdir):
             os.makedirs(outdir)
         shutil.copyfile(
             '%s/%s/%s/%s/0/0_0.jpg' % (self.dzi, year, md5, channels[0]),
             '%s/%s.jpg' % (outdir, md5))
         thumbnail = '/thumbnails/%s/%s.jpg' % (urlquote(year),
                                                urlquote(md5))
         urls = []
         for channel in channels:
             urls.append('url=/data/%s/%s/%s/ImageProperties.xml' %
                         (year, md5, channel))
         return (thumbnail, '&'.join(urls))
     except:
         et, ev, tb = sys.exc_info()
         self.logger.error('got unexpected exception "%s"' % str(ev))
         self.logger.error('%s' %
                           str(traceback.format_exception(et, ev, tb)))
         self.sendMail('FAILURE Tiles: write thumbnail ERROR',
                       '%s\n' % str(traceback.format_exception(et, ev, tb)))
         os.remove(filename)
         return (None, None)
Esempio n. 2
0
    def getRowFilter(self, row):
        filters = []
        key = None
        for k in self.find_keys():
            if self.row_has_key(row, k):
                key = k
                break

        if key is None:
            raise ValueError("can't find appropriate key")
        for k in key.get('unique_columns'):
            filters.append("{k}={v}".format(k=urlquote(k), v=urlquote(row[k])))
        return filters
Esempio n. 3
0
 def getCatalogTable(asset_mapping, metadata_dict=None):
     schema_name, table_name = asset_mapping.get('target_table',
                                                 [None, None])
     if not (schema_name and table_name):
         metadata_dict_lower = {
             k.lower(): v
             for k, v in metadata_dict.items()
         }
         schema_name = metadata_dict_lower.get("schema")
         table_name = metadata_dict_lower.get("table")
     if not (schema_name and table_name):
         raise ValueError(
             "Unable to determine target catalog table for asset type.")
     return '%s:%s' % (urlquote(schema_name), urlquote(table_name))
Esempio n. 4
0
 def deleteFromHatrac(self):
     url = '/entity/Common:Delete_Hatrac/Hatrac_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::' 
     resp = self.catalog.get(url)
     resp.raise_for_status()
     files = resp.json()
     fileids = []
     for f in files:
         fileids.append((f['Hatrac_URI'], f['RID']))
             
     self.logger.debug('Deleting from hatrac %d files(s).' % (len(fileids))) 
     for hatrac_uri,rid in fileids:
         try:
             self.store.del_obj(hatrac_uri)
             self.logger.debug('SUCCEEDED deleted from hatrac the "%s" file.' % (hatrac_uri)) 
             columns = ["Hatrac_Deleted", "Processing_Status"]
             columns = ','.join([urlquote(col) for col in columns])
             url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns)
             obj = {'RID': rid,
                    'Hatrac_Deleted': True,
                    'Processing_Status': 'success'
                    }
             self.catalog.put(
                 url,
                 json=[obj]
             )
             self.logger.debug('SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac URL: "%s".' % (hatrac_uri)) 
         except Exception as e:
             et, ev, tb = sys.exc_info()
             self.logger.error('got exception "%s"' % str(ev))
             self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
             self.reportFailure(rid, str(e))
Esempio n. 5
0
    def deleteFromHatrac(self):
        url = '/entity/Common:Delete_Hatrac/Hatrac_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::'
        resp = self.catalog.get(url)
        resp.raise_for_status()
        files = resp.json()
        fileids = []
        for f in files:
            fileids.append((f['Hatrac_URI'], f['RID']))

        self.logger.debug('Deleting from hatrac %d files(s).' % (len(fileids)))
        for hatrac_uri, rid in fileids:
            try:
                self.store.del_obj(hatrac_uri)
                self.logger.debug(
                    'SUCCEEDED deleted from hatrac the "%s" file.' %
                    (hatrac_uri))
                columns = ["Hatrac_Deleted", "Processing_Status"]
                columns = ','.join([urlquote(col) for col in columns])
                url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns)
                obj = {
                    'RID': rid,
                    'Hatrac_Deleted': True,
                    'Processing_Status': 'success'
                }
                self.catalog.put(url, json=[obj])
                self.logger.debug(
                    'SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac URL: "%s".'
                    % (hatrac_uri))
            except Exception as e:
                et, ev, tb = sys.exc_info()
                self.logger.error('got exception "%s"' % str(ev))
                self.logger.error('%s' %
                                  str(traceback.format_exception(et, ev, tb)))
                self.reportFailure(rid, str(e))
Esempio n. 6
0
    def exists(self, tablename):
        # check if table exists in ermrest catalog
        assert isinstance(self._ermrest_catalog, ErmrestCatalog)
        sname, tname = self._parse_table_name(tablename)

        try:
            path = '/schema/%s/table/%s' % (urlquote(sname), urlquote(tname))
            r = self._ermrest_catalog.get(path)
            r.raise_for_status()
            resp = r.json()
            return resp is not None
        except HTTPError as e:
            if e.response.status_code == 404:
                return False
            else:
                raise e
Esempio n. 7
0
 def put_row_update(self, update_row):
     self.catalog.put('%s;%s' % (self.unit.put_update_baseurl, ','.join([
         urlquote(col, safe='')
         for col in list(update_row.keys()) if col not in ['ID', 'RID']
     ])),
                      json=[update_row])
     sys.stderr.write('\nupdated in ERMrest: %s' %
                      json.dumps(update_row, indent=2))
Esempio n. 8
0
            def get_data():
                r = self.catalog.get(
                    '/entity/CFDE:%s@sort(RID)?limit=%d' % (
                        urlquote(resource['name']),
                        self.batch_size,
                    ))
                rows = r.json()
                yield rows

                while rows:
                    last = rows[-1]['RID']
                    r = self.catalog.get(
                        '/entity/CFDE:%s@sort(RID)@after(%s)?limit=%d' % (
                            urlquote(resource['name']),
                            urlquote(last),
                            self.batch_size,
                    ))
                    rows = r.json()
                    yield rows
Esempio n. 9
0
 def _urlEncodeMetadata(self, safe_overrides=None):
     urlencoded = dict()
     if not safe_overrides:
         safe_overrides = dict()
     for k, v in self.metadata.items():
         if k.endswith("_urlencoded"):
             continue
         urlencoded[k + "_urlencoded"] = urlquote(str(v),
                                                  safe_overrides.get(k, ""))
     self._updateFileMetadata(urlencoded)
Esempio n. 10
0
 def _urlencode_envars(self, safe_overrides=None):
     urlencoded = dict()
     if not safe_overrides:
         safe_overrides = dict()
     for k, v in self.envars.items():
         if k.endswith("_urlencoded"):
             continue
         urlencoded[k + "_urlencoded"] = urlquote(str(v),
                                                  safe_overrides.get(k, ""))
     self.envars.update(urlencoded)
Esempio n. 11
0
    def cleanup_restored_catalog(self):
        # cleanup restore state markers
        logging.info("Cleaning up restore state...")
        dst_model = self.dst_catalog.getCatalogModel()
        for sname, schema in dst_model.schemas.items():
            for tname, table in schema.tables.items():
                annotation_uri = "/schema/%s/table/%s/annotation/%s" % (
                    urlquote(sname), urlquote(tname),
                    urlquote(self.RESTORE_STATE_URL))
                try:
                    self.dst_catalog.delete(annotation_uri)
                except Exception as e:
                    logging.warning(
                        "Unable to cleanup restore state marker annotation %s: %s"
                        % (annotation_uri, format_exception(e)))
                    continue

        # truncate restore history
        if self.truncate_after:
            logging.info("Truncating restore history...")
            snaptime = self.dst_catalog.get("/").json()["snaptime"]
            self.dst_catalog.delete("/history/,%s" % urlquote(snaptime))
Esempio n. 12
0
 def on_actionRefresh_triggered(self):
     if not self.identity:
         self.updateStatus("Unable to get worklist -- not logged in.")
         return
     qApp.setOverrideCursor(Qt.WaitCursor)
     self.disableControls()
     self.updateStatus("Refreshing worklist...")
     queryTask = CatalogQueryTask(self.catalog)
     queryTask.status_update_signal.connect(self.onRefreshResult)
     if self.is_curator() and self.curator_mode:
         queryTask.query(WORKLIST_CURATOR_QUERY)
     else:
         queryTask.query(WORKLIST_QUERY % urlquote(self.identity, ""))
Esempio n. 13
0
    def _catalogRecordUpdate(self, catalog_table, old_row, new_row):
        """

        :param catalog_table:
        :param new_row:
        :param old_row:
        :return:
        """
        if self.cancelled:
            return None

        try:
            keys = sorted(list(new_row.keys()))
            old_keys = sorted(list(old_row.keys()))
            if keys != old_keys:
                raise RuntimeError(
                    "Cannot update catalog - "
                    "new row column list and old row column list do not match: New: %s != Old: %s"
                    % (keys, old_keys))
            combined_row = {
                'o%d' % i: old_row[keys[i]]
                for i in range(len(keys))
            }
            combined_row.update(
                {'n%d' % i: new_row[keys[i]]
                 for i in range(len(keys))})
            update_uri = '/attributegroup/%s/%s;%s' % (catalog_table, ','.join(
                ["o%d:=%s" % (i, urlquote(keys[i]))
                 for i in range(len(keys))]), ','.join([
                     "n%d:=%s" % (i, urlquote(keys[i]))
                     for i in range(len(keys))
                 ]))
            logging.debug(
                "Attempting catalog record update [%s] with data: %s" %
                (update_uri, json.dumps(combined_row)))
            return self.catalog.put(update_uri, json=[combined_row]).json()
        except:
            (etype, value, traceback) = sys.exc_info()
            raise CatalogUpdateError(format_exception(value))
Esempio n. 14
0
 def writeThumbnailImage(self, filename, year, md5):
     try:
         scanDir='%s/%s/%s' % (self.dzi, year, md5)
         channels = []
         for channel in os.listdir(scanDir):
             if os.path.isdir('%s%s%s' % (scanDir, os.sep, channel)):
                channels.append( channel)
         outdir = '%s/%s' % (self.thumbnails, year)
         if not os.path.exists(outdir):
             os.makedirs(outdir)
         shutil.copyfile('%s/%s/%s/%s/0/0_0.jpg' % (self.dzi, year, md5, channels[0]), '%s/%s.jpg' % (outdir, md5))
         thumbnail = '/thumbnails/%s/%s.jpg' % (urlquote(year), urlquote(md5))
         urls = []
         for channel in channels:
             urls.append('url=/data/%s/%s/%s/ImageProperties.xml' % (year, md5, channel))
         return (thumbnail, '&'.join(urls))
     except:
         et, ev, tb = sys.exc_info()
         self.logger.error('got unexpected exception "%s"' % str(ev))
         self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
         self.sendMail('FAILURE Tiles: write thumbnail ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
         os.remove(filename)
         return (None, None)
Esempio n. 15
0
    def _uploadAsset(self,
                     file_path,
                     asset_mapping,
                     match_groupdict,
                     callback=None):

        # 1. Populate metadata by querying the catalog
        self._queryFileMetadata(file_path, asset_mapping, match_groupdict)

        # 2. If "create_record_before_upload" specified in asset_mapping, check for an existing record, creating a new
        #    one if necessary. Otherwise delay this logic until after the file upload.
        record = None
        if stob(asset_mapping.get("create_record_before_upload", False)):
            record = self._getFileRecord(asset_mapping)

        # 3. Perform the Hatrac upload
        self._getFileHatracMetadata(asset_mapping)
        hatrac_options = asset_mapping.get("hatrac_options", {})
        versioned_uri = \
            self._hatracUpload(self.metadata["URI"],
                               file_path,
                               md5=self.metadata.get("md5_base64"),
                               sha256=self.metadata.get("sha256_base64"),
                               content_type=self.guessContentType(file_path),
                               content_disposition=self.metadata.get("content-disposition"),
                               chunked=True,
                               create_parents=stob(hatrac_options.get("create_parents", True)),
                               allow_versioning=stob(hatrac_options.get("allow_versioning", True)),
                               callback=callback)
        logging.debug("Hatrac upload successful. Result object URI: %s" %
                      versioned_uri)
        if stob(hatrac_options.get("versioned_uris", True)):
            self.metadata["URI"] = versioned_uri
        else:
            self.metadata["URI"] = versioned_uri.rsplit(":")[0]
        self.metadata["URI_urlencoded"] = urlquote(self.metadata["URI"])

        # 3. Check for an existing record and create a new one if necessary
        if not record:
            record = self._getFileRecord(asset_mapping)

        # 4. Update an existing record, if necessary
        column_map = asset_mapping.get("column_map", {})
        updated_record = self.interpolateDict(self.metadata, column_map)
        if updated_record != record:
            logging.info("Updating catalog for file [%s]" %
                         self.getFileDisplayName(file_path))
            self._catalogRecordUpdate(self.metadata['target_table'], record,
                                      updated_record)
Esempio n. 16
0
    def provision(self):
        if 'CFDE' not in self.cat_model_root.schemas:
            # blindly load the whole model on an apparently empty catalog
            self.catalog.post('/schema',
                              json=self.model_doc).raise_for_status()
        else:
            # do some naively idempotent model definitions on existing catalog
            # adding missing tables and missing columns
            need_tables = []
            need_columns = []
            hazard_fkeys = {}
            for ntable in self.doc_cfde_schema.tables.values():
                table = self.cat_cfde_schema.tables.get(ntable.name)
                if table is not None:
                    for ncolumn in ntable.column_definitions:
                        column = table.column_definitions.elements.get(
                            ncolumn.name)
                        if column is not None:
                            # TODO: check existing columns for compatibility?
                            pass
                        else:
                            cdoc = ncolumn.prejson()
                            cdoc.update({
                                'table_name': table.name,
                                'nullok': True
                            })
                            need_columns.append(cdoc)
                    # TODO: check existing table keys/foreign keys for compatibility?
                else:
                    tdoc = ntable.prejson()
                    tdoc['schema_name'] = 'CFDE'
                    need_tables.append(tdoc)

            if need_tables:
                logger.debug("Added tables %s" %
                             ([tdoc['table_name'] for tdoc in need_tables]))
                self.catalog.post('/schema',
                                  json=need_tables).raise_for_status()

            for cdoc in need_columns:
                self.catalog.post('/schema/CFDE/table/%s/column' %
                                  urlquote(cdoc['table_name']),
                                  json=cdoc).raise_for_status()
                logger.debug("Added column %s.%s" %
                             (cdoc['table_name'], cdoc['name']))

        self.get_model()
        self.provision_dataset_ancestor_tables()
        self.provision_denorm_tables()
Esempio n. 17
0
 def metadata_query_url(image_id):
     """Build ERMrest query URL returning metadata record needed by class."""
     return (
         '/attributegroup'
         '/I:=Zebrafish:Image/ID=%(id)s;RID=%(id)s'
         '/AS1:=left(I:Alignment%%20Standard)=(Zebrafish:Alignment%%20Standard:RID)'
         '/ASI1:=left(AS1:Image)=(Zebrafish:Image:RID)'
         '/AS2:=left(ASI1:Alignment%%20Standard)=(Zebrafish:Alignment%%20Standard:RID)'
         '/ASI2:=left(AS2:Image)=(Zebrafish:Image:RID)'
         '/$I'
         '/*'
         ';ASI1_obj:=array(ASI1:*)'
         ',AS1_obj:=array(AS1:*)'
         ',ASI2_obj:=array(ASI2:*)'
         ',AS2_obj:=array(AS2:*)') % {
             'id': urlquote(image_id),
         }
Esempio n. 18
0
    def deleteFromYouTube(self):
        url = '/entity/Common:Delete_Youtube/Youtube_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::'
        resp = self.catalog.get(url)
        resp.raise_for_status()
        files = resp.json()
        fileids = []
        for f in files:
            fileids.append((f['YouTube_URI'], f['RID']))

        self.logger.debug('Deleting from YouTube %d videos(s).' %
                          (len(fileids)))
        for youtube_uri, rid in fileids:
            try:
                youtube_deleted = self.youtube_delete(youtube_uri)
                if youtube_deleted == True:
                    self.logger.debug(
                        'SUCCEEDED deleted from YouTube the video with the URL: "%s".'
                        % (youtube_uri))
                    columns = ["Youtube_Deleted", "Processing_Status"]
                    columns = ','.join([urlquote(col) for col in columns])
                    url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (
                        columns)
                    obj = {
                        'RID': rid,
                        'Youtube_Deleted': True,
                        'Processing_Status': 'success'
                    }
                    self.catalog.put(url, json=[obj])
                    self.logger.debug(
                        'SUCCEEDED updated the Common:Delete_Youtube table entry for the YouTube URL: "%s".'
                        % (youtube_uri))
                else:
                    self.logger.debug(
                        'Failure in deleting from YouTube the video with the URL: "%s".'
                        % (youtube_uri))
                    self.sendMail(
                        'FAILURE Delete YouTube: YouTube Failure',
                        'The video "%s" could not be deleted from Youtube.' %
                        youtube_uri)
                    self.reportFailure(rid, 'YouTube Failure')
            except Exception as e:
                et, ev, tb = sys.exc_info()
                self.logger.error('got exception "%s"' % str(ev))
                self.logger.error('%s' %
                                  str(traceback.format_exception(et, ev, tb)))
                self.reportFailure(rid, str(e))
Esempio n. 19
0
 def updateAttributes (self, path, rid, columns, row):
     """
     Update the ermrest attributes with the row values.
     """
     try:
         columns = ','.join([urlquote(col) for col in columns])
         url = '/attributegroup/%s/RID;%s' % (path, columns)
         resp = self.catalog.put(
             url,
             json=[row]
         )
         resp.raise_for_status()
         self.logger.debug('SUCCEEDED updated the table "%s" for the RID "%s"  with "%s".' % (path, rid, json.dumps(row, indent=4))) 
     except:
         et, ev, tb = sys.exc_info()
         self.logger.error('got exception "%s"' % str(ev))
         self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
         self.sendMail('FAILURE Tiles: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
Esempio n. 20
0
    def provision(self):
        if 'CFDE' not in self.model_root.schemas:
            # blindly load the whole model on an apparently empty catalog
            self.catalog.post('/schema',
                              json=self.model_doc).raise_for_status()
        else:
            # do some naively idempotent model definitions on existing catalog
            # adding missing tables and missing columns
            need_tables = []
            need_columns = []
            hazard_fkeys = {}
            for tname, tdoc in self.model_doc['schemas']['CFDE'][
                    'tables'].items():
                if tname in self.cfde_schema.tables:
                    table = self.cfde_schema.tables[tname]
                    for cdoc in tdoc['column_definitions']:
                        if cdoc['name'] in table.column_definitions.elements:
                            column = table.column_definitions.elements[
                                cdoc['name']]
                            # TODO: check existing columns for compatibility?
                        else:
                            cdoc.update({'table_name': tname, 'nullok': True})
                            need_columns.append(cdoc)
                    # TODO: check existing table keys/foreign keys for compatibility?
                else:
                    tdoc['schema_name'] = 'CFDE'
                    need_tables.append(tdoc)

            if need_tables:
                if self.verbose:
                    print("Added tables %s" %
                          ([tdoc['table_name'] for tdoc in need_tables]))
                self.catalog.post('/schema',
                                  json=need_tables).raise_for_status()

            for cdoc in need_columns:
                self.catalog.post('/schema/CFDE/table/%s/column' %
                                  urlquote(cdoc['table_name']),
                                  json=cdoc).raise_for_status()
                if self.verbose:
                    print("Added column %s.%s" %
                          (cdoc['table_name'], cdoc['name']))

        self.get_model()
Esempio n. 21
0
 def updateAttributes(self, path, rid, columns, row):
     """
     Update the ermrest attributes with the row values.
     """
     try:
         columns = ','.join([urlquote(col) for col in columns])
         url = '/attributegroup/%s/RID;%s' % (path, columns)
         resp = self.catalog.put(url, json=[row])
         resp.raise_for_status()
         self.logger.debug(
             'SUCCEEDED updated the table "%s" for the RID "%s"  with "%s".'
             % (path, rid, json.dumps(row, indent=4)))
     except:
         et, ev, tb = sys.exc_info()
         self.logger.error('got exception "%s"' % str(ev))
         self.logger.error('%s' %
                           str(traceback.format_exception(et, ev, tb)))
         self.sendMail('FAILURE Tiles: reportFailure ERROR',
                       '%s\n' % str(traceback.format_exception(et, ev, tb)))
Esempio n. 22
0
 def load_data_files(self):
     tables_doc = self.model_doc['schemas']['CFDE']['tables']
     for tname in self.data_tnames_topo_sorted():
         # we are doing a clean load of data in fkey dependency order
         table = self.cat_model_root.table("CFDE", tname)
         resource = tables_doc[tname]["annotations"].get(
             self.resource_tag, {})
         if "path" in resource:
             fname = "%s/%s" % (self.dirname, resource["path"])
             with open(fname, "r") as f:
                 # translate TSV to python dicts
                 reader = csv.reader(f, delimiter="\t")
                 row2dict = self.make_row2dict(table, next(reader))
                 entity_url = "/entity/CFDE:%s" % urlquote(table.name)
                 batch_size = 50000  # TODO: Should this be configurable?
                 # Batch catalog ingests; too-large ingests will hang and fail
                 # Largest known CFDE ingest has file with >5m rows
                 batch = []
                 for raw_row in reader:
                     # Collect full batch, then post at once
                     batch.append(row2dict(raw_row))
                     if len(batch) >= batch_size:
                         try:
                             self.catalog.post(entity_url, json=batch)
                             logger.debug("Batch of rows for %s loaded" %
                                          table.name)
                         except Exception as e:
                             logger.error("Table %s data load FAILED from "
                                          "%s: %s" % (table.name, fname, e))
                             raise
                         else:
                             batch.clear()
                 # After reader exhausted, ingest final batch
                 if len(batch) > 0:
                     try:
                         self.catalog.post(entity_url, json=batch)
                     except Exception as e:
                         logger.error("Table %s data load FAILED from "
                                      "%s: %s" % (table.name, fname, e))
                         raise
                 logger.info("All data for table %s loaded from %s." %
                             (table.name, fname))
Esempio n. 23
0
 def reportFailure(self, rid, error_message):
     """
         Update the Delete_Youtube table with the failure result.
     """
     try:
         columns = ["Processing_Status"]
         columns = ','.join([urlquote(col) for col in columns])
         url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (columns)
         obj = {'RID': rid, 'Processing_Status': '%s' % error_message}
         self.catalog.put(url, json=[obj])
         self.logger.debug(
             'SUCCEEDED updated the Delete_Youtube table for the RID "%s"  with the Processing_Status result "%s".'
             % (rid, error_message))
     except:
         et, ev, tb = sys.exc_info()
         self.logger.error('got exception "%s"' % str(ev))
         self.logger.error('%s' %
                           str(traceback.format_exception(et, ev, tb)))
         self.sendMail('FAILURE Delete YouTube: reportFailure ERROR',
                       '%s\n' % str(traceback.format_exception(et, ev, tb)))
Esempio n. 24
0
 def load_data_files(self):
     tables_doc = self.model_doc['schemas']['CFDE']['tables']
     for tname in self.data_tnames_topo_sorted():
         # we are doing a clean load of data in fkey dependency order
         table = self.model_root.table("CFDE", tname)
         resource = tables_doc[tname]["annotations"].get(
             self.resource_tag, {})
         if "path" in resource:
             fname = "%s/%s" % (self.dirname, resource["path"])
             with open(fname, "r") as f:
                 # translate TSV to python dicts
                 reader = csv.reader(f, delimiter="\t")
                 raw_rows = list(reader)
                 row2dict = self.make_row2dict(table, raw_rows[0])
                 dict_rows = [row2dict(row) for row in raw_rows[1:]]
                 self.catalog.post("/entity/CFDE:%s" % urlquote(table.name),
                                   json=dict_rows)
                 if self.verbose:
                     print("Table %s data loaded from %s." %
                           (table.name, fname))
Esempio n. 25
0
 def reportFailure(self, rid, error_message):
     """
         Update the Delete_Youtube table with the failure result.
     """
     try:
         columns = ["Processing_Status"]
         columns = ','.join([urlquote(col) for col in columns])
         url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (columns)
         obj = {'RID': rid,
                'Processing_Status': '%s' % error_message
                }
         self.catalog.put(
             url,
             json=[obj]
         )
         self.logger.debug('SUCCEEDED updated the Delete_Youtube table for the RID "%s"  with the Processing_Status result "%s".' % (rid, error_message)) 
     except:
         et, ev, tb = sys.exc_info()
         self.logger.error('got exception "%s"' % str(ev))
         self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
         self.sendMail('FAILURE Delete YouTube: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
Esempio n. 26
0
 def reportFailure(self, accessionId, error_message):
     """
         Update the Slide_Video table with the YouTube Upload failure result.
     """
     try:
         columns = ["Processing_Status"]
         columns = ','.join([urlquote(col) for col in columns])
         url = '/attributegroup/Immunofluorescence:Slide_Video/Accession_ID;%s' % (columns)
         obj = {'Accession_ID': accessionId,
                "Processing_Status": '%s' % error_message
                }
         self.catalog.put(
             url,
             json=[obj]
         )
         self.logger.debug('SUCCEEDED updated the Slide_Video table for the video Accession_ID "%s"  with the Processing_Status result "%s".' % (accessionId, error_message)) 
     except:
         et, ev, tb = sys.exc_info()
         self.logger.error('got unexpected exception "%s"' % str(ev))
         self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
         self.sendMail('FAILURE YouTube Upload: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
Esempio n. 27
0
 def metadata_query_url(study_id):
     """Build ERMrest query URL returning metadata record needed by class."""
     return ('/attributegroup/'
             'NPS:=%(nps)s/ID=%(sid)s;RID=%(sid)s/'
             'IPS:=(NPS:Study)/'
             'N1:=(NPS:%(n1)s)/'
             'N2:=(NPS:%(n2)s)/'
             'I1:=(N1:%(si)s)/'
             '$NPS/'
             '*;'
             'I1:%(zs)s,'
             'IPS:Alignment,'
             'n1:=N1:%(sfu)s,'
             'n2:=N2:%(sfu)s,') % {
                 'sid': urlquote(study_id),
                 'nps': urlquote('Nucleic Pair Study'),
                 'n1': urlquote('Nucleic Region 1'),
                 'n2': urlquote('Nucleic Region 2'),
                 'si': urlquote('Source Image'),
                 'zs': urlquote('ZYX Spacing'),
                 'sfu': urlquote('Segments Filtered URL'),
             }
Esempio n. 28
0
 def reportFailure(self, accessionId, error_message):
     """
         Update the Slide_Video table with the YouTube Upload failure result.
     """
     try:
         columns = ["Processing_Status"]
         columns = ','.join([urlquote(col) for col in columns])
         url = '/attributegroup/Immunofluorescence:Slide_Video/Accession_ID;%s' % (
             columns)
         obj = {
             'Accession_ID': accessionId,
             "Processing_Status": '%s' % error_message
         }
         self.catalog.put(url, json=[obj])
         self.logger.debug(
             'SUCCEEDED updated the Slide_Video table for the video Accession_ID "%s"  with the Processing_Status result "%s".'
             % (accessionId, error_message))
     except:
         et, ev, tb = sys.exc_info()
         self.logger.error('got unexpected exception "%s"' % str(ev))
         self.logger.error('%s' %
                           str(traceback.format_exception(et, ev, tb)))
         self.sendMail('FAILURE YouTube Upload: reportFailure ERROR',
                       '%s\n' % str(traceback.format_exception(et, ev, tb)))
Esempio n. 29
0
 def deleteFromYouTube(self):
     url = '/entity/Common:Delete_Youtube/Youtube_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::' 
     resp = self.catalog.get(url)
     resp.raise_for_status()
     files = resp.json()
     fileids = []
     for f in files:
         fileids.append((f['YouTube_URI'], f['RID']))
             
     self.logger.debug('Deleting from YouTube %d videos(s).' % (len(fileids))) 
     for youtube_uri,rid in fileids:
         try:
             youtube_deleted = self.youtube_delete(youtube_uri)
             if youtube_deleted == True:
                 self.logger.debug('SUCCEEDED deleted from YouTube the video with the URL: "%s".' % (youtube_uri)) 
                 columns = ["Youtube_Deleted", "Processing_Status"]
                 columns = ','.join([urlquote(col) for col in columns])
                 url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (columns)
                 obj = {'RID': rid,
                        'Youtube_Deleted': True,
                        'Processing_Status': 'success'
                        }
                 self.catalog.put(
                     url,
                     json=[obj]
                 )
                 self.logger.debug('SUCCEEDED updated the Common:Delete_Youtube table entry for the YouTube URL: "%s".' % (youtube_uri)) 
             else:
                 self.logger.debug('Failure in deleting from YouTube the video with the URL: "%s".' % (youtube_uri)) 
                 self.sendMail('FAILURE Delete YouTube: YouTube Failure', 'The video "%s" could not be deleted from Youtube.' % youtube_uri)
                 self.reportFailure(rid, 'YouTube Failure')
         except Exception as e:
             et, ev, tb = sys.exc_info()
             self.logger.error('got exception "%s"' % str(ev))
             self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
             self.reportFailure(rid, str(e))
parser.add_option('-t', '--RMT', action='store', dest='RMT', type='string', help='Modification Timestamp')

(options, args) = parser.parse_args()

if not options.server:
    print ('ERROR: Missing host name')
    sys.exit()

if not options.credentials:
    print ('ERROR: Missing credentials file')
    sys.exit()

if not options.RMT:
    RMT = ''
else:
    RMT = '&RMT::geq::%s' % (urlquote(options.RMT))

"""
Get the non NULL "Thumbnail" values from the "Scan" table.
"""
servername = options.server
credentialsfilename = options.credentials
catalog = 1
schema = 'Microscopy'
table = 'Scan'
column = 'Thumbnail'
prefix = '/var/www/html'
output = '%s_add_border.sh' % servername.split('.')[0]

credentials = json.load(open(credentialsfilename))
catalog = ErmrestCatalog('https', servername, catalog, credentials)
Esempio n. 31
0
 def _post_parser_init(self, args):
     """Shared initialization for all sub-commands.
     """
     self.host = args.host if args.host else 'localhost'
     self.resource = urlquote(args.resource, '/')
     self.store = HatracStore('https', args.host, DerivaHatracCLI._get_credential(self.host, args.token))
Esempio n. 32
0
    def uploadVideo(self):
        url = '/entity/Immunofluorescence:Slide_Video/!Identifier::null::&!Name::null::&!Bytes::null::&Media_Type=video%2Fmp4/Processing_Status=in%20progress;Processing_Status::null::'
        resp = self.catalog.get(url)
        resp.raise_for_status()
        videos = resp.json()
        videoids = []
        for video in videos:
            videoids.append(
                (video['Accession_ID'], video['Name'], video['Title'],
                 video['Description'], video['Identifier'], video['MD5'],
                 video['YouTube_MD5'], video['YouTube_URI'], video['RID'],
                 video['Consortium'], video['MP4_URI'], video['RCT'],
                 video['RMT']))

        self.logger.debug('Processing %d video(s).' % (len(videoids)))
        for accessionId, fileName, title, description, uri, md5, youtube_md5, youtube_uri, rid, consortium, mp4_uri, rct, rmt in videoids:
            if description == None:
                description = ''
            consortium_url = ''
            if consortium == 'GUD':
                consortium_url = 'gudmap.org'
            elif consortium == 'RBK':
                consortium_url = 'rebuildingakidney.org'
            f, MP4_URI = self.getVideoFile(fileName, uri, consortium_url, md5,
                                           accessionId)
            if f == None or MP4_URI == None:
                self.reportFailure(accessionId, 'error_no_video_file')
                continue

            if youtube_uri != None and youtube_md5 != md5:
                """
                We have an update.
                Mark the video to be deleted from YouTube
                """
                url = '/entity/Common:Delete_Youtube?defaults=RID,RCT,RMT'
                obj = {
                    'YouTube_MD5': youtube_md5,
                    'YouTube_URI': youtube_uri,
                    'Record_Type': 'Immunofluorescence:Slide_Video',
                    'Record_RID': rid,
                    'Youtube_Deleted': False
                }
                try:
                    r = self.catalog.post(url, json=[obj])
                    r.raise_for_status()
                    self.logger.debug(
                        'SUCCEEDED updated the Common:Delete_Youtube table entry for the YouTube URL: "%s".'
                        % (youtube_uri))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error(
                        '%s' % str(traceback.format_exception(et, ev, tb)))
                    self.sendMail(
                        'FAILURE YouTube Upload: Delete_Youtube ERROR',
                        '%s\n' % str(traceback.format_exception(et, ev, tb)))

                if mp4_uri != None:
                    """
                    We have an update.
                    Insert the old video into the Delete_Hatrac table
                    """

                    self.logger.debug(
                        'Inserting the old MP4 video "%s" file into the Delete_Hatrac table.'
                        % (fileName))
                    url = '/entity/Common:Delete_Hatrac?defaults=RID,RCT,RMT'
                    obj = {
                        'Hatrac_MD5': mp4_uri.split('/')[-1],
                        'Hatrac_URI': mp4_uri,
                        'Hatrac_Deleted': False,
                        'Record_Type': 'Immunofluorescence:Slide_Video',
                        'Record_RID': rid,
                        'Record_RCT': rct,
                        'Record_RMT': rmt,
                        'Record_Deleted': False
                    }
                    try:
                        r = self.catalog.post(url, json=[obj])
                        r.raise_for_status()
                        self.logger.debug(
                            'SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac_URI: "%s".'
                            % (mp4_uri))
                    except:
                        et, ev, tb = sys.exc_info()
                        self.logger.error(
                            '%s' % str(traceback.format_exception(et, ev, tb)))
                        self.sendMail(
                            'FAILURE YouTube Upload: Delete_Hatrac ERROR',
                            '%s\n' %
                            str(traceback.format_exception(et, ev, tb)))

            self.logger.debug('Uploading the video "%s" to YouTube' %
                              (fileName))
            """
            Get the video properties
            """
            cfg = self.getVideoProperties(f)
            if cfg != None:
                width, height = self.getVideoResolution(cfg)
                self.logger.debug('Video resolution: (%d x %d).' %
                                  (width, height))
            else:
                self.logger.debug('Could not get the video resolution.')
            """
            Initialize YouTube video parameters
            """
            self.args.file = f
            self.args.title = ('%s:\n%s' % (consortium_url, title))[:64]
            self.args.description = description
            """
            Upload video to YouTube
            """
            try:
                request = self.youtube_request()
                if request is not None:
                    id = self.youtube_upload(request)
                    returncode = 0
                else:
                    returncode = 1
            except:
                et, ev, tb = sys.exc_info()
                self.logger.error('got unexpected exception "%s"' % str(ev))
                self.logger.error('%s' %
                                  str(traceback.format_exception(et, ev, tb)))
                self.sendMail(
                    'FAILURE YouTube Upload: YouTube ERROR',
                    '%s\n' % str(traceback.format_exception(et, ev, tb)))
                returncode = 1

            if returncode != 0:
                self.logger.error('Can not upload to YouTube the "%s" file.' %
                                  (fileName))
                self.sendMail(
                    'FAILURE YouTube Upload',
                    'Can not upload to YouTube the "%s" file.' % (fileName))
                os.remove(f)
                """
                Update the Slide_Video table with the failure result.
                """
                self.reportFailure(accessionId, 'error_youtube_upload')
                continue
            """
            Upload the Slide_Video table with the SUCCESS status
            """
            columns = [
                "MP4_URI", "YouTube_MD5", "YouTube_URI", "Processing_Status"
            ]
            #youtube_uri = "https://www.youtube.com/embed/%s?showinfo=0&rel=0" % id
            youtube_uri = "https://www.youtube.com/embed/%s?rel=0" % id
            os.remove(f)
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/Immunofluorescence:Slide_Video/Accession_ID;%s' % (
                columns)
            obj = {
                'Accession_ID': accessionId,
                'MP4_URI': MP4_URI,
                'YouTube_URI': youtube_uri,
                'YouTube_MD5': md5,
                'Processing_Status': 'success'
            }
            try:
                r = self.catalog.put(url, json=[obj])
                r.raise_for_status()
                self.logger.debug(
                    'SUCCEEDED updated the Immunofluorescence:Slide_Video table entry for the file: "%s".'
                    % (fileName))
            except:
                et, ev, tb = sys.exc_info()
                self.logger.error('%s' %
                                  str(traceback.format_exception(et, ev, tb)))
                self.sendMail(
                    'FAILURE YouTube Upload: Delete_Hatrac ERROR',
                    '%s\n' % str(traceback.format_exception(et, ev, tb)))
            self.logger.debug(
                'SUCCEEDED updated the entry for the "%s" file.' % (fileName))

        self.logger.debug('Ended uploading videos to YouTube.')
Esempio n. 33
0
    def register_release(self, id, dcc_datapackages, description=None):
        """Idempotently register new release in registry, returning (release row, dcc_datapackages).

        :param id: The release.id for the new record
        :param dcc_datapackages: A dict mapping {dcc_id: datapackage, ...} for constituents
        :param description: A human-readable description of this release

        The constituents are a set of datapackage records (dicts) as
        returned by the get_datapackage() method. The dcc_id key MUST
        match the submitting_dcc of the record.

        For repeat calls on existing releases, the definition will be
        updated if the release is still in the planning state, but a
        StateError will be raised if it is no longer in planning state.

        """
        for dcc_id, dp in dcc_datapackages.items():
            if dcc_id != dp['submitting_dcc']:
                raise ValueError(
                    'Mismatch in dcc_datapackages DCC IDs %s != %s' %
                    (dcc_id, dp['submitting_dcc']))

        try:
            rel, old_dcc_dps = self.get_release(id)
        except exception.ReleaseUnknown:
            # create new release record
            newrow = {
                'id': id,
                'status': terms.cfde_registry_rel_status.planning,
                'description':
                None if description is nochange else description,
            }
            defaults = [
                cname for cname in
                self._builder.CFDE.release.column_definitions.keys()
                if cname not in newrow
            ]
            logger.info('Registering new release %s' % (id, ))
            self._catalog.post('/entity/CFDE:release?defaults=%s' %
                               (','.join(defaults), ),
                               json=[newrow])
            rel, old_dcc_dps = self.get_release(id)

        if rel['status'] != terms.cfde_registry_rel_status.planning:
            raise exception.StateError(
                'Idempotent registration disallowed on existing release %(id)s with status=%(status)s'
                % rel)

        # prepare for idempotent updates
        old_dp_ids = {dp['id'] for dp in old_dcc_dps.values()}
        dp_ids = {dp['id'] for dp in dcc_datapackages.values()}
        datapackages = {dp['id']: dp for dp in dcc_datapackages.values()}

        # idempotently revise description
        if rel['description'] != description:
            logger.info('Updating release %s description: %s' % (
                id,
                description,
            ))
            self.update_release(id, description=description)

        # find currently registered constituents
        path = self._builder.CFDE.dcc_release_datapackage.path
        path = path.filter(path.dcc_release_datapackage.release == id)
        old_dp_ids = {row['datapackage'] for row in path.entities().fetch()}

        # remove stale consituents
        for dp_id in old_dp_ids.difference(dp_ids):
            logger.info('Removing constituent datapackage %s from release %s' %
                        (dp_id, id))
            self._catalog.delete(
                '/entity/CFDE:dcc_release_datapackage/release=%s&datapackage=%s'
                % (
                    urlquote(id),
                    urlquote(dp_id),
                ))

        # add new consituents
        new_dp_ids = dp_ids.difference(old_dp_ids)
        if new_dp_ids:
            logger.info('Adding constituent datapackages %s to release %s' %
                        (new_dp_ids, id))
            self._catalog.post('/entity/CFDE:dcc_release_datapackage',
                               json=[{
                                   'dcc':
                                   datapackages[dp_id]['submitting_dcc'],
                                   'release':
                                   id,
                                   'datapackage':
                                   dp_id,
                               } for dp_id in new_dp_ids])

        # return registry content
        return self.get_release(id)
Esempio n. 34
0
 def uploadVideo(self):
     url = '/entity/Immunofluorescence:Slide_Video/!Identifier::null::&!Name::null::&!Bytes::null::&Media_Type=video%2Fmp4/Processing_Status=in%20progress;Processing_Status::null::' 
     resp = self.catalog.get(url)
     resp.raise_for_status()
     videos = resp.json()
     videoids = []
     for video in videos:
         videoids.append((video['Accession_ID'], video['Name'], video['Title'], video['Description'], video['Identifier'], video['MD5'], video['YouTube_MD5'], video['YouTube_URI'], video['RID'], video['Consortium'], video['MP4_URI'], video['RCT'], video['RMT']))
             
     self.logger.debug('Processing %d video(s).' % (len(videoids))) 
     for accessionId,fileName,title,description,uri,md5,youtube_md5,youtube_uri,rid,consortium,mp4_uri,rct,rmt in videoids:
         if description == None:
             description = ''
         consortium_url = ''
         if consortium == 'GUD':
             consortium_url = 'gudmap.org'
         elif consortium == 'RBK':
             consortium_url = 'rebuildingakidney.org'
         f, MP4_URI= self.getVideoFile(fileName, uri, consortium_url, md5, accessionId)
         if f == None or MP4_URI == None:
             self.reportFailure(accessionId, 'error_no_video_file')
             continue
             
         if youtube_uri != None and youtube_md5 != md5:
             """
             We have an update.
             Mark the video to be deleted from YouTube
             """
             url = '/entity/Common:Delete_Youtube?defaults=RID,RCT,RMT'
             obj = {'YouTube_MD5': youtube_md5,
                    'YouTube_URI': youtube_uri,
                    'Record_Type': 'Immunofluorescence:Slide_Video',
                    'Record_RID': rid,
                    'Youtube_Deleted': False
                    }
             try:
                 r = self.catalog.post(
                     url,
                     json=[obj]
                 )
                 r.raise_for_status()
                 self.logger.debug('SUCCEEDED updated the Common:Delete_Youtube table entry for the YouTube URL: "%s".' % (youtube_uri)) 
             except:
                 et, ev, tb = sys.exc_info()
                 self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                 self.sendMail('FAILURE YouTube Upload: Delete_Youtube ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
             
             if mp4_uri != None:
                 """
                 We have an update.
                 Insert the old video into the Delete_Hatrac table
                 """
                 
                 self.logger.debug('Inserting the old MP4 video "%s" file into the Delete_Hatrac table.' % (fileName))
                 url = '/entity/Common:Delete_Hatrac?defaults=RID,RCT,RMT'
                 obj = {'Hatrac_MD5': mp4_uri.split('/')[-1],
                        'Hatrac_URI': mp4_uri,
                        'Hatrac_Deleted': False,
                        'Record_Type': 'Immunofluorescence:Slide_Video',
                        'Record_RID': rid,
                        'Record_RCT': rct,
                        'Record_RMT': rmt,
                        'Record_Deleted': False
                        }
                 try:
                     r = self.catalog.post(
                         url,
                         json=[obj]
                     )
                     r.raise_for_status()
                     self.logger.debug('SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac_URI: "%s".' % (mp4_uri)) 
                 except:
                     et, ev, tb = sys.exc_info()
                     self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                     self.sendMail('FAILURE YouTube Upload: Delete_Hatrac ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
         
         self.logger.debug('Uploading the video "%s" to YouTube' % (fileName))
         
         """
         Get the video properties
         """
         cfg = self.getVideoProperties(f)
         if cfg != None:
             width,height = self.getVideoResolution(cfg)
             self.logger.debug('Video resolution: (%d x %d).' % (width, height)) 
         else:
             self.logger.debug('Could not get the video resolution.') 
             
         """
         Initialize YouTube video parameters
         """
         self.args.file = f
         self.args.title = ('%s:\n%s' % (consortium_url, title))[:64]
         self.args.description = description
         
         """
         Upload video to YouTube
         """
         try:
             request = self.youtube_request()
             if request is not None:
                 id = self.youtube_upload(request)
                 returncode = 0
             else:
                 returncode = 1
         except:
             et, ev, tb = sys.exc_info()
             self.logger.error('got unexpected exception "%s"' % str(ev))
             self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
             self.sendMail('FAILURE YouTube Upload: YouTube ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
             returncode = 1
         
         if returncode != 0:
             self.logger.error('Can not upload to YouTube the "%s" file.' % (fileName)) 
             self.sendMail('FAILURE YouTube Upload', 'Can not upload to YouTube the "%s" file.' % (fileName))
             os.remove(f)
             """
             Update the Slide_Video table with the failure result.
             """
             self.reportFailure(accessionId, 'error_youtube_upload')
             continue
             
         """
         Upload the Slide_Video table with the SUCCESS status
         """
         columns = ["MP4_URI", "YouTube_MD5", "YouTube_URI", "Processing_Status"]
         #youtube_uri = "https://www.youtube.com/embed/%s?showinfo=0&rel=0" % id
         youtube_uri = "https://www.youtube.com/embed/%s?rel=0" % id
         os.remove(f)
         columns = ','.join([urlquote(col) for col in columns])
         url = '/attributegroup/Immunofluorescence:Slide_Video/Accession_ID;%s' % (columns)
         obj = {'Accession_ID': accessionId,
                'MP4_URI': MP4_URI,
                'YouTube_URI': youtube_uri,
                'YouTube_MD5': md5,
                'Processing_Status': 'success'
                }
         try:
             r = self.catalog.put(
                 url,
                 json=[obj]
             )
             r.raise_for_status()
             self.logger.debug('SUCCEEDED updated the Immunofluorescence:Slide_Video table entry for the file: "%s".' % (fileName)) 
         except:
             et, ev, tb = sys.exc_info()
             self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
             self.sendMail('FAILURE YouTube Upload: Delete_Hatrac ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
         self.logger.debug('SUCCEEDED updated the entry for the "%s" file.' % (fileName)) 
         
     self.logger.debug('Ended uploading videos to YouTube.') 
Esempio n. 35
0
    def processHistologicalImages(self):
        """
        Query for detecting new slides - the most recently first
        """
        url = '/entity/Histological_Images:HE_Slide/!File_Bytes::null::&Pyramid_URL::null::/Processing_Status=in%%20progress;Processing_Status::null::@sort(%s::desc::)' % (
            urlquote('RCT'))

        resp = self.catalog.get(url)
        resp.raise_for_status()
        slides = resp.json()
        slideids = []
        for slide in slides:
            slideids.append(
                (slide['ID'], slide['Filename'], slide['File_URL'],
                 slide['RCT'], slide['File_MD5'], slide['Name'], slide['RID']))

        self.logger.debug('Processing %d HistologicalImages slides(s).' %
                          (len(slideids)))

        for slideId, filename, file_url, creation_time, md5, name, rid in slideids:
            self.logger.debug('Generating pyramidal tiles for the file "%s"' %
                              (filename))
            """
            Extract the file from hatrac
            """
            f = self.getHatracFile(filename, file_url)

            if f == None:
                continue
            """
            Create the directory for the tiles
            """
            year = parse(creation_time).strftime("%Y")
            outdir = '%s/%s/%s' % (self.dzi, year, md5)
            if not os.path.exists(outdir):
                os.makedirs(outdir)
            """
            Convert the file to DZI
            """
            returncode = self.convert2dzi(f, outdir)

            if returncode != 0:
                """
                Update the slide table with the failure result.
                """
                self.updateAttributes(
                    'Histological_Images:HE_Slide', rid,
                    ["Thumbnail", "Processing_Status"], {
                        'RID': rid,
                        'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                        'Processing_Status': 'czi2dzi error'
                    })
                continue
            """
            Generate the thumbnail
            """
            thumbnail, urls = self.writeThumbnailImage(f, year, md5)

            if thumbnail == None:
                """
                Update the slide table with the failure result.
                """
                self.updateAttributes(
                    'Histological_Images:HE_Slide', rid,
                    ["Thumbnail", "Processing_Status"], {
                        'RID': rid,
                        'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                        'Processing_Status': 'DZI failure'
                    })
                continue
            """
            Extract the metadata
            """
            self.logger.debug('Extracting metadata for filename "%s"' %
                              (filename))
            bioformatsClient = BioformatsClient(showinf=self.showinf, \
                                                czirules=self.czirules, \
                                                cziFile=f, \
                                                logger=self.logger)
            try:
                metadata = bioformatsClient.getMetadata()
                if metadata == None:
                    metadata = {}
                self.logger.debug('Metadata: "%s"' % str(metadata))
                os.remove('temp.xml')
            except XMLSyntaxError:
                et, ev, tb = sys.exc_info()
                self.logger.error('got unexpected exception "%s"' % str(ev))
                self.logger.error('%s' %
                                  str(traceback.format_exception(et, ev, tb)))
                self.sendMail(
                    'FAILURE Tiles: XMLSyntaxError',
                    '%s\n' % str(traceback.format_exception(et, ev, tb)))
                metadata = {}

            os.remove(f)
            """
            Update the slide table with the success result.
            """
            self.updateAttributes(
                'Histological_Images:HE_Slide', rid,
                ["Thumbnail", "Pyramid_URL", "Processing_Status", "uri"], {
                    'RID': rid,
                    'Thumbnail': thumbnail,
                    'Pyramid_URL': '/%s?%s' % (self.viewer, urls),
                    'uri': '/%s?%s' % (self.viewer, urls),
                    "Processing_Status": 'success'
                })

            self.logger.debug(
                'SUCCEEDED created the tiles directory for the file "%s".' %
                (filename))
            """
            Update/Create the image entry with the metadata
            """
            obj = {}
            obj['ID'] = slideId
            obj['Name'] = name
            obj['url'] = '/chaise/viewer/#2/Histological_Images:HE_Slide/ID=%d' % slideId
            columns = ['ID', 'Name', 'url']
            for col in self.metadata:
                if col in metadata and metadata[col] != None:
                    columns.append(col)
                    obj[col] = metadata[col]
            """
            Check if we have an update or create
            """
            rid = self.getRID('Histological_Images:HE_Image',
                              'ID=%d' % slideId)
            if rid != None:
                obj['RID'] = rid
                self.updateAttributes('Histological_Images:HE_Image', rid,
                                      columns, obj)
            else:
                self.createEntity('Histological_Images:HE_Image', obj)

            self.logger.debug(
                'SUCCEEDED created the image entry for the file "%s".' %
                (filename))

        self.logger.debug('Ended HistologicalImages Slides Processing.')
Esempio n. 36
0
                  help='Modification Timestamp')

(options, args) = parser.parse_args()

if not options.server:
    print('ERROR: Missing host name')
    sys.exit()

if not options.credentials:
    print('ERROR: Missing credentials file')
    sys.exit()

if not options.RMT:
    RMT = ''
else:
    RMT = '&RMT::geq::%s' % (urlquote(options.RMT))
"""
Get the non NULL "Thumbnail" values from the "Scan" table.
"""
servername = options.server
credentialsfilename = options.credentials
catalog = 1
schema = 'Microscopy'
table = 'Scan'
column = 'Thumbnail'
prefix = '/var/www/html'
output = '%s_add_border.sh' % servername.split('.')[0]

credentials = json.load(open(credentialsfilename))
catalog = ErmrestCatalog('https', servername, catalog, credentials)
table = 'Scan'
acquisition = 'Acquisition Date'
czi = 'HTTP URL'
rid = 'RID'
rct = 'RCT'
filename = 'filename'

credentials = json.load(open(credentialsfilename))
catalog = ErmrestCatalog('https', servername, catalog, credentials)

hatrac_store = HatracStore(
    'https', 
    servername,
    {'cookie': credentials['cookie']}
)
url = '/attribute/%s:%s/%s::null::/%s,%s,%s,%s' % (urlquote(schema), urlquote(table), urlquote(acquisition), urlquote(rid), urlquote(rct), urlquote(filename), urlquote(czi))
print 'Query URL: "%s"' % url

resp = catalog.get(url)
resp.raise_for_status()
rows = resp.json()

entities = []
for row in rows:
    if options.skip == True:
        acquisitionDate = row[rct][:10]
    else:
        acquisitionDate = getAcquisitionDate(row)
    entities.append({rid: row[rid], acquisition: acquisitionDate})
    
print 'Total rows to be updated: %d' % len(entities)
Esempio n. 38
0
 def processHistologicalImages(self):
     """
     Query for detecting new slides - the most recently first
     """
     url = '/entity/Histological_Images:HE_Slide/!File_Bytes::null::&Pyramid_URL::null::/Processing_Status=in%%20progress;Processing_Status::null::@sort(%s::desc::)' % (urlquote('RCT'))
     
     resp = self.catalog.get(url)
     resp.raise_for_status()
     slides = resp.json()
     slideids = []
     for slide in slides:
         slideids.append((slide['ID'], slide['Filename'], slide['File_URL'], slide['RCT'], slide['File_MD5'], slide['Name'], slide['RID']))
             
     self.logger.debug('Processing %d HistologicalImages slides(s).' % (len(slideids))) 
             
     for slideId,filename,file_url,creation_time,md5,name,rid in slideids:
         self.logger.debug('Generating pyramidal tiles for the file "%s"' % (filename))
         
         """
         Extract the file from hatrac
         """
         f = self.getHatracFile(filename, file_url)
         
         if f == None:
             continue
         
         """
         Create the directory for the tiles
         """
         year = parse(creation_time).strftime("%Y")
         outdir = '%s/%s/%s' % (self.dzi, year, md5)
         if not os.path.exists(outdir):
             os.makedirs(outdir)
         
         """
         Convert the file to DZI
         """
         returncode = self.convert2dzi(f, outdir)
         
         if returncode != 0:
             """
             Update the slide table with the failure result.
             """
             self.updateAttributes('Histological_Images:HE_Slide',
                                  rid,
                                  ["Thumbnail", "Processing_Status"],
                                  {'RID': rid,
                                   'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                                   'Processing_Status': 'czi2dzi error'
                                   })
             continue
         
         """
         Generate the thumbnail
         """
         thumbnail,urls = self.writeThumbnailImage(f, year, md5)
         
         if thumbnail == None:
             """
             Update the slide table with the failure result.
             """
             self.updateAttributes('Histological_Images:HE_Slide',
                                  rid,
                                  ["Thumbnail", "Processing_Status"],
                                  {'RID': rid,
                                   'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                                   'Processing_Status': 'DZI failure'
                                   })
             continue
             
         """
         Extract the metadata
         """
         self.logger.debug('Extracting metadata for filename "%s"' % (filename)) 
         bioformatsClient = BioformatsClient(showinf=self.showinf, \
                                             czirules=self.czirules, \
                                             cziFile=f, \
                                             logger=self.logger)
         try:
             metadata = bioformatsClient.getMetadata()
             if metadata == None:
                 metadata = {}
             self.logger.debug('Metadata: "%s"' % str(metadata)) 
             os.remove('temp.xml')
         except XMLSyntaxError:
             et, ev, tb = sys.exc_info()
             self.logger.error('got unexpected exception "%s"' % str(ev))
             self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
             self.sendMail('FAILURE Tiles: XMLSyntaxError', '%s\n' % str(traceback.format_exception(et, ev, tb)))
             metadata = {}
                 
         os.remove(f)
         
         """
         Update the slide table with the success result.
         """
         self.updateAttributes('Histological_Images:HE_Slide',
                              rid,
                              ["Thumbnail","Pyramid_URL","Processing_Status","uri"],
                              {'RID': rid,
                                'Thumbnail': thumbnail,
                                'Pyramid_URL': '/%s?%s' % (self.viewer, urls),
                                'uri': '/%s?%s' % (self.viewer, urls),
                                "Processing_Status": 'success'
                               })
         
         self.logger.debug('SUCCEEDED created the tiles directory for the file "%s".' % (filename)) 
         
         """
         Update/Create the image entry with the metadata
         """
         obj = {}
         obj['ID'] = slideId
         obj['Name'] = name
         obj['url'] = '/chaise/viewer/#2/Histological_Images:HE_Slide/ID=%d' % slideId
         columns = ['ID', 'Name', 'url']
         for col in self.metadata:
             if col in metadata and metadata[col] != None:
                 columns.append(col)
                 obj[col] = metadata[col]
                 
         """
         Check if we have an update or create
         """
         rid = self.getRID('Histological_Images:HE_Image', 'ID=%d' % slideId)
         if rid != None:
             obj['RID'] = rid
             self.updateAttributes('Histological_Images:HE_Image',
                                  rid,
                                  columns,
                                  obj
                                   )
         else:
             self.createEntity('Histological_Images:HE_Image', obj)
             
         self.logger.debug('SUCCEEDED created the image entry for the file "%s".' % (filename)) 
         
     self.logger.debug('Ended HistologicalImages Slides Processing.') 
Esempio n. 39
0
    def __init__(self, *args, **kwargs):
        DerivaDownload.__init__(self, *args, **kwargs)

        self.config_file = kwargs.get("config_file")
        self.annotation_config = None

        if not self.config:
            self.config = copy.deepcopy(self.BASE_CONFIG)

            no_schema = kwargs.get("no_schema", False)
            if not no_schema:
                self.config["catalog"]["query_processors"].append(self.BASE_SCHEMA_QUERY_PROC)

            no_bag = kwargs.get("no_bag", False)
            if not no_bag:
                bag = dict()
                bag["bag_name"] = os.path.basename(self.output_dir)
                bag["bag_archiver"] = kwargs.get("bag_archiver", "tgz")
                bag["bag_algorithms"] = ["sha256", "md5"]
                self.config["bag"] = bag

            # if credentials have not been explicitly set yet, try to get them from the default credential store
            if not self.credentials:
                self.set_credentials(get_credential(self.hostname))

            logging.debug("Inspecting catalog model...")
            model = self.catalog.getCatalogModel()
            # if we dont have catalog ownership rights, its a hard error for now
            if not model.acls:
                raise DerivaBackupAuthorizationError("Only catalog owners may perform full catalog dumps.")

            if kwargs.get("no_data", False):
                return

            exclude = kwargs.get("exclude_data", list())
            for sname, schema in model.schemas.items():
                if sname in exclude:
                    logging.info("Excluding data dump from all tables in schema: %s" % sname)
                    continue
                for tname, table in schema.tables.items():
                    fqtname = "%s:%s" % (sname, tname)
                    if table.kind != "table":
                        logging.warning("Skipping data dump of %s: %s" % (table.kind, fqtname))
                        continue
                    if fqtname in exclude:
                        logging.info("Excluding data dump from table: %s" % fqtname)
                        continue
                    if "RID" not in table.column_definitions.elements:
                        logging.warning(
                            "Source table %s.%s lacks system-columns and will not be dumped." % (sname, tname))

                    # Configure table data download query processors
                    data_format = "json" if (sname, tname) in {
                        ('public', 'ERMrest_Client'),
                        ('public', 'ERMrest_Group'),
                    } else "json-stream"
                    q_sname = urlquote(sname)
                    q_tname = urlquote(tname)
                    output_path = self.BASE_DATA_OUTPUT_PATH.format(q_sname, q_tname)
                    query_path = self.BASE_DATA_QUERY_PATH.format(q_sname, q_tname)
                    query_proc = dict()
                    query_proc["processor"] = data_format
                    query_proc["processor_params"] = {"query_path": query_path, "output_path": output_path}
                    self.config["catalog"]["query_processors"].append(query_proc)

        self.generate_asset_configs()
Esempio n. 40
0
 def metadata_query_url(study_id):
     """Build ERMrest query URL returning metadata record needed by class."""
     return ('/attributegroup/'
             'SPS:=%(sps)s/ID=%(sid)s;RID=%(sid)s/'
             'IPS:=%(ips)s/'
             'S1:=(SPS:%(s1)s)/'
             'S2:=(SPS:%(s2)s)/'
             'N1:=(IPS:%(n1)s)/'
             'N2:=(IPS:%(n2)s)/'
             'I1:=(N1:%(si)s)/'
             '$SPS/'
             '*;'
             'I1:%(zs)s,'
             'IPS:Alignment,'
             'n1:=IPS:%(r1u)s,'
             'n2:=IPS:%(r2u)s,'
             's1:=SPS:%(r1u)s,'
             's2:=SPS:%(r2u)s,'
             's1raw:=S1:%(sfu)s,'
             's2raw:=S2:%(sfu)s,'
             's1box:=S1:%(slice)s,'
             's2box:=S2:%(slice)s,'
             's1n:=S1:%(nu)s,'
             's2n:=S2:%(nu)s') % {
                 'sid': urlquote(study_id),
                 'sps': urlquote('Synaptic Pair Study'),
                 'ips': urlquote('Image Pair Study'),
                 'sfu': urlquote('Segments Filtered URL'),
                 's1': urlquote('Synaptic Region 1'),
                 's2': urlquote('Synaptic Region 2'),
                 'n1': urlquote('Nucleic Region 1'),
                 'n2': urlquote('Nucleic Region 2'),
                 'si': urlquote('Source Image'),
                 'zs': urlquote('ZYX Spacing'),
                 'r1u': urlquote('Region 1 URL'),
                 'r2u': urlquote('Region 2 URL'),
                 'slice': urlquote('ZYX Slice'),
                 'nu': urlquote('Npz URL'),
             }