Example #1
0
 def __init__(self, **kwargs):
     self.metadata = kwargs.get("metadata")
     self.baseuri = kwargs.get("baseuri")
     o = urlparse.urlparse(self.baseuri)
     self.scheme = o[0]
     host_port = o[1].split(":")
     self.host = host_port[0]
     self.path = o.path
     self.port = None
     if len(host_port) > 1:
         self.port = host_port[1]
     self.dzi = kwargs.get("dzi")
     self.thumbnails = kwargs.get("thumbnails")
     self.czi2dzi = kwargs.get("czi2dzi")
     self.viewer = kwargs.get("viewer")
     self.czirules = kwargs.get("czirules")
     self.showinf = kwargs.get("showinf")
     self.data_scratch = kwargs.get("data_scratch")
     self.cookie = kwargs.get("cookie")
     self.store = HatracStore(self.scheme, self.host,
                              {'cookie': self.cookie})
     self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                          self.path.split('/')[-1],
                                          {'cookie': self.cookie})
     self.mail_server = kwargs.get("mail_server")
     self.mail_sender = kwargs.get("mail_sender")
     self.mail_receiver = kwargs.get("mail_receiver")
     self.logger = kwargs.get("logger")
     self.logger.debug('Client initialized.')
def add_file_to_cohort(file, description, cohort):
    """
    Upload a file into a data collection and add that file into the set of files associated with a cohort analysis.
    :param file: local path to the file that should be uploaded and associated with the cohort
    :param description: Text that is used to describe the file that is being uploaded
    :param cohort: RID of the analysis cohort to which the file file should be assoicated.
    :return: None.
    """
    credential = get_credential(synapseserver)
    store = HatracStore('https', synapseserver, credentials=credential)
    catalog = ErmrestCatalog('https', synapseserver, 1, credentials=credential)

    pb = catalog.getPathBuilder()
    zebrafish = pb.Zebrafish
    synapse = pb.Synapse

    collection = synapse.tables['Collection']
    files = collection.insert([{'Description': description, 'URL':'dummy2'}])
    newfileRID = files[0]['RID']
    print('inserted file into collection {}'.format(newfileRID))
    path = '/hatrac/Data/Data_{0}_{1}'.format(newfileRID, os.path.basename(file))
    loc = store.put_obj(path, file)
    files[0]['URL'] = loc
    files[0]['Orig. Basename'] = os.path.basename(file)

    r = store.head(path)
    files[0]['MD5'] = r.headers['content-md5']
    files[0]['#Bytes'] = r.headers['Content-Length']
    files = collection.update(files)

    # Now link into cohort.
    collection_table = zebrafish.tables['Cohort Analysis_Collection']
    newfileRID = collection_table.insert([{'Cohort Analysis': cohort, 'Collection': newfileRID}])
    return
Example #3
0
    def configure(self, config_path):
        # configure logging
        self.ui.logTextBrowser.widget.log_update_signal.connect(self.updateLog)
        self.ui.logTextBrowser.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
        logging.getLogger().addHandler(self.ui.logTextBrowser)
        logging.getLogger().setLevel(logging.INFO)

        # configure Ermrest/Hatrac
        if not config_path:
            config_path = os.path.join(os.path.expanduser(
                os.path.normpath("~/.deriva/synapse/synspy-launcher")), "config.json")
        self.config_path = config_path
        config = read_config(self.config_path, create_default=True, default=DEFAULT_CONFIG)
        protocol = config["server"]["protocol"]
        self.server = config["server"]["host"]
        catalog_id = config["server"]["catalog_id"]
        session_config = config.get("session")
        self.catalog = ErmrestCatalog(protocol, self.server, catalog_id, self.credential, session_config=session_config)
        self.store = HatracStore(protocol, self.server, self.credential, session_config=session_config)

        # create working dir (tempdir)
        self.tempdir = tempfile.mkdtemp(prefix="synspy_")

        # determine viewer mode
        self.use_3D_viewer = True if config.get("viewer_mode", "2d").lower() == "3d" else False

        # curator mode?
        curator_mode = config.get("curator_mode")
        if not curator_mode:
            config["curator_mode"] = False
        self.curator_mode = config.get("curator_mode")

        # save config
        self.config = config
        write_config(self.config_path, self.config)
def add_file_to_replicant(dataset_rid, fmap, description=''):
    """
    Upload a file into a data collection and add that file into the set of files associated with a cohort analysis.
    :param file: local path to the file that should be uploaded and associated with the cohort
    :param description: Text that is used to describe the file that is being uploaded
    :param cohort: RID of the analysis cohort to which the file file should be assoicated.
    :return: None.
    """
    credential = get_credential(pbcserver)
    store = HatracStore('https', pbcserver, credentials=credential)
    catalog = ErmrestCatalog('https', pbcserver, 1, credentials=credential)

    (experiment_rid, biosample_rid, replicate_rid, filename) = fmap
    dirname = re.sub('_[0-9]+_pre_rec$', '', filename)
    filename = filename + '.mrc'
    path = '{}/{}'.format(dirname, filename)
    print('Uploading ', path)
    objpath = '/hatrac/commons/data/{}/{}/{}?parents=true'.format(
        dataset_rid, replicate_rid, os.path.basename(filename))
    print('to ', objpath)
    loc = store.put_obj(objpath, path)
    print(loc)
    r = store.head(objpath)
    md5 = r.headers['content-md5']
    byte_count = r.headers['Content-Length']
    submit_time = r.headers['Date']

    file = {
        'dataset': dataset_rid,
        'anatomy': pancreas,
        'device': xray_tomography,
        'equipment_model': 'commons:600:',
        'description': description,
        'url': loc,
        'filename': os.path.basename(filename),
        'file_type': 'commons:601:',
        'byte_count': byte_count,
        'submitted_on': submit_time,
        'md5': md5,
        'replicate': replicate_rid
    }
    print(file)

    pb = catalog.getPathBuilder()
    isa = pb.isa

    tomography_data = isa.tables['xray_tomography_data']
    try:
        newrid = tomography_data.insert([file])
    except:
        newrid = tomography_data.update([file])
    return
def fetch_studies(fileid):
    """
    Get the set of files associated with a cohort analysis.
    :param fileid: RID of the saved analysis data.
    :return: None.
    """

    credential = get_credential(synapseserver)

    if '@' in fileid:
        [fileid, snaptime] = fileid.split('@')
        catalog = ErmrestSnapshot('https', synapseserver, 1, snaptime, credentials=credential)
    else:
        catalog = ErmrestCatalog('https', synapseserver, 1, credentials=credential)
        catalog = catalog.latest_snapshot()
        snaptime = catalog.snaptime
    hatrac = HatracStore('https', synapseserver, credentials=credential)

    pb = catalog.getPathBuilder()
    zebrafish = pb.Zebrafish
    synapse = pb.Synapse

    # Lets get some shortcuts for awkward table names.
    cohort_table = zebrafish.tables['Cohort Analysis']
    collection_table = zebrafish.tables['Cohort Analysis_Collection']
    collection = synapse.tables['Collection']

    # Now get the studyid associated with this file....

    studyid = collection.filter(collection.RID == fileid).link(collection_table).entities()[0]['Cohort Analysis']

    path = cohort_table.alias('studyset').link(zebrafish.tables['Cohort Analysis_Collection']).link(collection)
    path = path.filter(path.studyset.RID == studyid)

    fileentity = collection.filter(collection.RID == fileid).entities()[0]
    file = fileentity['URL']
    print('File description: {}'.format(fileentity['Description']))

    try:
        # Get a path for a temporary file to store  results
        tmpfile = os.path.join(tempfile.mkdtemp(), 'pairs-dump.pkl')
        hatrac.get_obj(file, destfilename=tmpfile)
        with open(tmpfile, 'rb') as fo:
            slist = pickle.load(fo)
    finally:
        shutil.rmtree(os.path.dirname(tmpfile))

    print('Restored {0} studies from {1}'.format(len(slist['Studies']),studyid))
    return studyid, slist
Example #6
0
 def __init__(self, **kwargs):
     self.baseuri = kwargs.get("baseuri")
     o = urlparse.urlparse(self.baseuri)
     self.scheme = o[0]
     host_port = o[1].split(":")
     self.host = host_port[0]
     self.path = o.path
     self.port = None
     if len(host_port) > 1:
         self.port = host_port[1]
     self.cookie = kwargs.get("cookie")
     self.store = HatracStore(
         self.scheme, 
         self.host,
         {'cookie': self.cookie}
     )
     self.catalog = PollingErmrestCatalog(
         self.scheme, 
         self.host,
         self.path.split('/')[-1],
         {'cookie': self.cookie}
     )
     self.mail_server = kwargs.get("mail_server")
     self.mail_sender = kwargs.get("mail_sender")
     self.mail_receiver = kwargs.get("mail_receiver")
     self.logger = kwargs.get("logger")
     self.logger.debug('Hatrac Client initialized.')
Example #7
0
 def __init__(self, **kwargs):
     self.metadata = kwargs.get("metadata")
     self.baseuri = kwargs.get("baseuri")
     o = urlparse.urlparse(self.baseuri)
     self.scheme = o[0]
     host_port = o[1].split(":")
     self.host = host_port[0]
     self.path = o.path
     self.port = None
     if len(host_port) > 1:
         self.port = host_port[1]
     self.dzi = kwargs.get("dzi")
     self.thumbnails = kwargs.get("thumbnails")
     self.czi2dzi = kwargs.get("czi2dzi")
     self.viewer = kwargs.get("viewer")
     self.czirules = kwargs.get("czirules")
     self.showinf = kwargs.get("showinf")
     self.data_scratch = kwargs.get("data_scratch")
     self.cookie = kwargs.get("cookie")
     self.store = HatracStore(
         self.scheme, 
         self.host,
         {'cookie': self.cookie}
     )
     self.catalog = PollingErmrestCatalog(
         self.scheme, 
         self.host,
         self.path.split('/')[-1],
         {'cookie': self.cookie}
     )
     self.mail_server = kwargs.get("mail_server")
     self.mail_sender = kwargs.get("mail_sender")
     self.mail_receiver = kwargs.get("mail_receiver")
     self.logger = kwargs.get("logger")
     self.logger.debug('Client initialized.')
def main(gu, species, source_url, hatrac_parent, skip_hatrac):
    hatrac_server = HatracStore('https', gu.host, gu.credential)

    # fetch source file and write it to scratch
    src = request.urlopen(source_url)
    filename = re.sub('.*/', '', source_url)
    parent = Path(gu.scratch_directory) / 'raw'
    parent.mkdir(parents=True, exist_ok=True)
    scratchpath = parent / filename
    scratchfile = scratchpath.open(mode='wb')

    while True:
        buf = src.read(102400)
        if len(buf) < 1:
            break
        scratchfile.write(buf)

    # get species id from name
    raw_species_table = gu.model.table(gu.species_schema, gu.species_table)
    name_col = gu.find_column_case_match(raw_species_table, 'Name')
    id_col = gu.find_column_case_match(raw_species_table, 'ID')
    species_table = gu.pb.schemas[gu.species_schema].tables[gu.species_table]
    rows=species_table.filter(species_table.column_definitions[name_col]==species).entities()
    species_id = rows[0][id_col]
    
    # upload and add record to catalog
    if not skip_hatrac:
        desturl = hatrac_parent + '/' + parse.quote(species_id) + '/' + parse.quote(filename)
        print(desturl)
        url = hatrac_server.put_obj(desturl, scratchpath, parents=True)
        table = gu.pb.schemas.get(gu.source_file_schema).tables[gu.source_file_table]
        record = {
            'Species' : species_id,
            'Downloaded_From' : source_url,
            'File_Name': filename,
            'File_URL' : url,
            'File_Bytes' : scratchpath.stat().st_size,
            'File_MD5' : hash_utils.compute_file_hashes(str(scratchpath), hashes=['md5'])['md5'][0]
        }
        
        try:
            table.insert([record])
        except DataPathException:
            table.update([record], ['Species'])

    # output the name of the scratch file that was created
    print(str(scratchpath))
Example #9
0
    def __init__(self, server,
                 output_dir=None, kwargs=None, config=None, config_file=None, credentials=None, credential_file=None):
        self.server = server
        self.hostname = None
        self.output_dir = output_dir if output_dir else "."
        self.envars = kwargs if kwargs else dict()
        self.catalog = None
        self.store = None
        self.config = config
        self.cancelled = False
        self.credentials = credentials if credentials else dict()
        self.metadata = dict()
        self.sessions = dict()

        info = "%s v%s [Python %s, %s]" % (
            self.__class__.__name__, VERSION, platform.python_version(), platform.platform(aliased=True))
        logging.info("Initializing downloader: %s" % info)

        if not self.server:
            raise RuntimeError("Server not specified!")

        # server variable initialization
        self.hostname = self.server.get('host', '')
        if not self.hostname:
            raise RuntimeError("Host not specified!")
        protocol = self.server.get('protocol', 'https')
        self.server_url = protocol + "://" + self.hostname
        catalog_id = self.server.get("catalog_id", "1")
        session_config = self.server.get('session')

        # credential initialization
        if credential_file:
            self.credentials = get_credential(self.hostname, credential_file)

        # catalog and file store initialization
        if self.catalog:
            del self.catalog
        self.catalog = ErmrestCatalog(
            protocol, self.hostname, catalog_id, self.credentials, session_config=session_config)
        if self.store:
            del self.store
        self.store = HatracStore(
            protocol, self.hostname, self.credentials, session_config=session_config)

        # process config file
        if config_file and os.path.isfile(config_file):
            self.config = read_config(config_file)
Example #10
0
 def __init__(self, **kwargs):
     self.baseuri = kwargs.get("baseuri")
     o = urlparse.urlparse(self.baseuri)
     self.scheme = o[0]
     host_port = o[1].split(":")
     self.host = host_port[0]
     self.path = o.path
     self.port = None
     if len(host_port) > 1:
         self.port = host_port[1]
     self.cookie = kwargs.get("cookie")
     self.store = HatracStore(self.scheme, self.host,
                              {'cookie': self.cookie})
     self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                          self.path.split('/')[-1],
                                          {'cookie': self.cookie})
     self.mail_server = kwargs.get("mail_server")
     self.mail_sender = kwargs.get("mail_sender")
     self.mail_receiver = kwargs.get("mail_receiver")
     self.logger = kwargs.get("logger")
     self.logger.debug('Hatrac Client initialized.')
def get_synapses(study):
    """
    Get the synapse data associated with a study.  We will retrieve the actual data from the object store, and we will
    get both the before and after data if it is availabe.  CSV version of the data will be read in and stored as a PANDA

     study: a dictionary that has URLs for the two images, before and after
     returns two pandas that have the synapses in them.
     """
    credential = get_credential(synapseserver)
    objectstore = HatracStore('https', synapseserver, credentials=credential)

    # Get a path for a tempory file to store HATRAC results
    path = os.path.join(tempfile.mkdtemp(), 'image')
    try:
        # Get the before image from hatrac, be careful in case its missing
        if study['BeforeURL']:
            objectstore.get_obj(study['BeforeURL'], destfilename=path)
            img1 = pd.read_csv(path)
            if True: # Skip the second element, which has metadata in it....
                img1.drop(img1.index[0], inplace=True)
        else:
            img1 = None

        # Get the after image from hatrac, be careful in case its missing
        if study['AfterURL']:
            objectstore.get_obj(study['AfterURL'], destfilename=path)
            img2 = pd.read_csv(path)
            if True:
                img2.drop(img2.index[0], inplace=True)
        else:
            img2 = None
    finally:
        shutil.rmtree(os.path.dirname(path))
    return {'Before': img1, 'After': img2, 'Type': study['Type'], 'Study': study['Study'], 'Subject': study['Subject']}
Example #12
0
def main():
    description = 'DERIVA Command line tool'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--server', help="Hatrac server")

    subparsers = parser.add_subparsers()

    # create the parser for the "list" command
    parser_list = subparsers.add_parser('list', aliases=['ls'])
    parser_list.add_argument('path', nargs='?', default='')
    parser_list.set_defaults(func=hatrac_list)

    # create the parser for the "dir" command
    parser_namespace = subparsers.add_parser('mkdir')
    parser_namespace.add_argument('path')
    parser_namespace.add_argument('-p', default=True)
    parser_namespace.set_defaults(func=hatrac_namespace)

    # copy  file to local directory
    parser_copy = subparsers.add_parser('copy')
    parser_copy.add_argument('path1')
    parser_copy.add_argument('path2')
    parser_copy.set_defaults(func=hatrac_copy)

    # parse the args and call whatever function was selected
    args = parser.parse_args()

    urlparts = urlsplit(args.path, scheme='http')
    host = args.server if args.server else urlparts.netloc
    if host is None:
        print('Hatrac server name required')
        return

    if args.server:
        args.path.replace('/hatrac', '')
        if not args.path.startswith('/'):
            args.path = '/' + args.path
        args.path = '/hatrac' + args.path
    elif args.path == '/hatrac':      # Missing trailing slash
        args.path = '/hatrac/'

    credential = get_credential(host)
    args.catalog = HatracStore(urlparts.scheme, host, credentials=credential)

    args.func(args)
Example #13
0
 def __init__(self, **kwargs):
     self.baseuri = kwargs.get("baseuri")
     o = urlparse.urlparse(self.baseuri)
     self.scheme = o[0]
     host_port = o[1].split(":")
     self.host = host_port[0]
     self.path = o.path
     self.port = None
     if len(host_port) > 1:
         self.port = host_port[1]
     self.cookie = kwargs.get("cookie")
     self.client_secrets_file = kwargs.get("client_secrets_file")
     self.client_oauth2_file = kwargs.get("client_oauth2_file")
     self.data_scratch = kwargs.get("data_scratch")
     self.ffmpeg = kwargs.get("ffmpeg")
     self.ffprobe = kwargs.get("ffprobe")
     self.category = kwargs.get("category")
     self.keywords = kwargs.get("keywords")
     self.privacyStatus = kwargs.get("privacyStatus")
     self.text_position = kwargs.get("text_position")
     self.store = HatracStore(self.scheme, self.host,
                              {'cookie': self.cookie})
     self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                          self.path.split('/')[-1],
                                          {'cookie': self.cookie})
     self.mail_server = kwargs.get("mail_server")
     self.mail_sender = kwargs.get("mail_sender")
     self.mail_receiver = kwargs.get("mail_receiver")
     self.logger = kwargs.get("logger")
     argparser.add_argument("--config",
                            required=True,
                            help="YouTube configuration file")
     self.args = argparser.parse_args()
     self.args.category = self.category
     self.args.keywords = self.keywords
     self.args.privacyStatus = self.privacyStatus
     self.args.noauth_local_webserver = True
     self.logger.debug('Upload YouTube Client initialized.')
Example #14
0
class HatracClient(object):
    """
    Network client for hatrac.
    """

    ## Derived from the ermrest iobox service client

    def __init__(self, **kwargs):
        self.baseuri = kwargs.get("baseuri")
        o = urlparse.urlparse(self.baseuri)
        self.scheme = o[0]
        host_port = o[1].split(":")
        self.host = host_port[0]
        self.path = o.path
        self.port = None
        if len(host_port) > 1:
            self.port = host_port[1]
        self.cookie = kwargs.get("cookie")
        self.store = HatracStore(self.scheme, self.host,
                                 {'cookie': self.cookie})
        self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                             self.path.split('/')[-1],
                                             {'cookie': self.cookie})
        self.mail_server = kwargs.get("mail_server")
        self.mail_sender = kwargs.get("mail_sender")
        self.mail_receiver = kwargs.get("mail_receiver")
        self.logger = kwargs.get("logger")
        self.logger.debug('Hatrac Client initialized.')

    """
    Send email notification
    """

    def sendMail(self, subject, text):
        if self.mail_server and self.mail_sender and self.mail_receiver:
            retry = 0
            ready = False
            while not ready:
                try:
                    msg = MIMEText('%s\n\n%s' % (text, mail_footer), 'plain')
                    msg['Subject'] = subject
                    msg['From'] = self.mail_sender
                    msg['To'] = self.mail_receiver
                    s = smtplib.SMTP(self.mail_server)
                    s.sendmail(self.mail_sender, self.mail_receiver.split(','),
                               msg.as_string())
                    s.quit()
                    self.logger.debug('Sent email notification.')
                    ready = True
                except socket.gaierror as e:
                    if e.errno == socket.EAI_AGAIN:
                        time.sleep(100)
                        retry = retry + 1
                        ready = retry > 10
                    else:
                        ready = True
                    if ready:
                        et, ev, tb = sys.exc_info()
                        self.logger.error('got exception "%s"' % str(ev))
                        self.logger.error(
                            '%s' % str(traceback.format_exception(et, ev, tb)))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error('got exception "%s"' % str(ev))
                    self.logger.error(
                        '%s' % str(traceback.format_exception(et, ev, tb)))
                    ready = True

    """
    Start the process for deleting files from hatrac
    """

    def start(self):
        try:
            self.deleteFromHatrac()
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail(
                'FAILURE Delete Hatrac: unexpected exception',
                '%s\nThe process might have been stopped\n' %
                str(traceback.format_exception(et, ev, tb)))
            raise

    """
    Delete videos from hatrac
    """

    def deleteFromHatrac(self):
        url = '/entity/Common:Delete_Hatrac/Hatrac_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::'
        resp = self.catalog.get(url)
        resp.raise_for_status()
        files = resp.json()
        fileids = []
        for f in files:
            fileids.append((f['Hatrac_URI'], f['RID']))

        self.logger.debug('Deleting from hatrac %d files(s).' % (len(fileids)))
        for hatrac_uri, rid in fileids:
            try:
                self.store.del_obj(hatrac_uri)
                self.logger.debug(
                    'SUCCEEDED deleted from hatrac the "%s" file.' %
                    (hatrac_uri))
                columns = ["Hatrac_Deleted", "Processing_Status"]
                columns = ','.join([urlquote(col) for col in columns])
                url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns)
                obj = {
                    'RID': rid,
                    'Hatrac_Deleted': True,
                    'Processing_Status': 'success'
                }
                self.catalog.put(url, json=[obj])
                self.logger.debug(
                    'SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac URL: "%s".'
                    % (hatrac_uri))
            except Exception as e:
                et, ev, tb = sys.exc_info()
                self.logger.error('got exception "%s"' % str(ev))
                self.logger.error('%s' %
                                  str(traceback.format_exception(et, ev, tb)))
                self.reportFailure(rid, str(e))

    """
    Update the Delete_Hatrac table with the ERROR status
    """

    def reportFailure(self, rid, error_message):
        """
            Update the Delete_Hatrac table with the failure result.
        """
        try:
            columns = ["Processing_Status"]
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns)
            obj = {'RID': rid, 'Processing_Status': '%s' % error_message}
            self.catalog.put(url, json=[obj])
            self.logger.debug(
                'SUCCEEDED updated the Delete_Hatrac table for the RID "%s"  with the Processing_Status result "%s".'
                % (rid, error_message))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Delete Hatrac: reportFailure ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))
Example #15
0
class Worker(object):
    # server to talk to... defaults to our own FQDN
    servername = os.getenv('SYNSPY_SERVER', platform.uname()[1])

    # secret session cookie
    credentials = get_credential(servername,
                                 credential_file=os.getenv(
                                     'SYNSPY_CREDENTIALS',
                                     DEFAULT_CREDENTIAL_FILE))

    poll_seconds = int(os.getenv('SYNSPY_POLL_SECONDS', '600'))

    scriptdir = os.getenv('SYNSPY_PATH')
    scriptdir = '%s/' % scriptdir if scriptdir else ''

    # remember where we started
    startup_working_dir = os.getcwd()

    tmpdir = os.getenv('TMPDIR', '/var/tmp')

    # track per-instance working dirs
    working_dirs = dict()

    # these are peristent/logical connections so we create once and reuse
    # they can retain state and manage an actual HTTP connection-pool
    catalog = PollingErmrestCatalog('https', servername, '1', credentials)

    store = HatracStore('https', servername, credentials)

    # for state-tracking across look_for_work() iterations
    idle_etag = None

    def __init__(self, row, unit):
        sys.stderr.write('Claimed job %s.\n' % row.get('RID'))

        self.row = row
        self.unit = unit
        self.subject_path = '/hatrac/Zf/Zf_%s' % row['Subject']

        self.working_dir = None
        # we want a temporary work space for our working files
        self.working_dir = tempfile.mkdtemp(dir=self.tmpdir)
        self.working_dirs[self.working_dir] = self.working_dir
        os.chdir(self.working_dir)
        sys.stderr.write('Using working directory %s.\n' % self.working_dir)

    @staticmethod
    def cleanup_working_dir(dirname):
        sys.stderr.write('Purging working directory %s... ' % dirname)
        shutil.rmtree(dirname)
        sys.stderr.write('done.\n')

    def cleanup(self):
        sys.stderr.write('\n')
        os.chdir(self.startup_working_dir)
        if self.working_dir:
            self.cleanup_working_dir(self.working_dir)
            del self.working_dirs[self.working_dir]
            self.working_dir = None

    def get_file(self, url):
        """Download file from URL returning local file name"""
        # short-cut, read file directly out of local hatrac
        filename = '/var/www' + url
        if os.path.isfile(filename):
            return filename
        else:
            # but fall back to HTTPS for remote workers...
            m = re.match('^(?P<basename>[^:]+)(?P<v>[:][0-9A-Z]+)?$',
                         os.path.basename(url))
            filename = m.groupdict()['basename']
            self.store.get_obj(url, destfilename=filename)
            return filename

    def get_image_info(self, img_filename):
        """Extract image resolution and shape."""
        try:
            I, md = load_image(str(img_filename))
        except Exception as e:
            raise WorkerBadDataError(
                'Image %s could not be loaded... is it the wrong format? %s' %
                (img_filename, e))
        if not hasattr(I, 'micron_spacing'):
            raise WorkerBadDataError(
                'Image %s lacks expected micron_spacing attribute.' %
                img_filename)
        return I.micron_spacing, I.shape

    def preprocess_roi(self, img_filename, zyx_slice, omit_voxels=False):
        """Analyze ROI and upload resulting NPZ file, returning NPZ URL."""
        command = [self.scriptdir + 'synspy-analyze', img_filename]
        env = {
            'ZYX_SLICE':
            zyx_slice,
            'ZYX_IMAGE_GRID':
            '0.4,0.26,0.26',
            'SYNSPY_DETECT_NUCLEI':
            str(self.row['Segmentation Mode'].lower() == 'nucleic'),
            'DUMP_PREFIX':
            './ROI_%s' % self.row['RID'],
            'OMIT_VOXELS':
            str(omit_voxels).lower(),
        }
        sys.stderr.write('Using analysis environment %r\n' % (env, ))
        analysis = subprocess.Popen(command, stdin=fnull, env=env)
        code = analysis.wait()
        del analysis
        if code != 0:
            raise WorkerRuntimeError('Non-zero analysis exit status %s!' %
                                     code)

        return self.store.put_loc(
            '%s/ROI_%s.npz' % (self.subject_path, self.row['RID']),
            'ROI_%s.npz' % self.row['RID'],
            headers={'Content-Type': 'application/octet-stream'})

    def filter_synspy_csv(self, csv_url):
        """Process input CSV URL and upload filtered CSV, returning CSV URL."""
        # this should really be dead code in practice... current launcher uploads filtered csv directly
        m = re.match('^(?P<basename>.+)[.]csv(?P<v>[:][0-9A-Z]+)?$',
                     os.path.basename(csv_url))
        base = m.groupdict()['basename']
        csv_filename = '%s.csv' % base

        # download the content to temp dir
        self.store.get_obj(csv_url, destfilename=csv_filename)

        # prepare to read CSV content from temp dir
        csv_file = open(csv_filename, 'r')
        reader = csv.DictReader(csv_file)

        # prepare to write filtered CSV to temp dir
        filtered_filename = '%s_only.csv' % base
        filtered_file = open(filtered_filename, 'w', newline='')
        writer = csv.writer(filtered_file)

        # write header
        writer.writerow(('Z', 'Y', 'X', 'raw core', 'raw hollow', 'DoG core',
                         'DoG hollow') +
                        (('red', ) if 'red' in reader.fieldnames else ()) +
                        ('override', ))

        # copy w/ filtering
        for row in reader:
            if row['Z'] == 'saved' and row['Y'] == 'parameters' \
               or row['override'] and int(row['override']) == 7:
                writer.writerow(
                    (row['Z'], row['Y'], row['X'], row['raw core'],
                     row['raw hollow'], row['DoG core'], row['DoG hollow']) +
                    ((row['red'], ) if 'red' in reader.fieldnames else ()) +
                    (row['override'], ))

        del reader
        csv_file.close()
        del writer
        filtered_file.close()

        return self.store.put_loc('%s/%s' %
                                  (self.subject_path, segments_filtered_file),
                                  segments_filtered_file,
                                  headers={'Content-Type': 'text/csv'})

    def compute_synspy_stats(self, csv_url, existing_row={}):
        """Process input CSV URL and return stats column value updates."""
        filename = self.get_file(csv_url)
        c, m, s, p = util.load_segment_info_from_csv(filename,
                                                     (0.4, 0.26, 0.26),
                                                     filter_status=(3, 7))
        if c.shape[0] > 0:
            stats = {
                'Core Min.': float(m[:, 0].min()),
                'Core Max.': float(m[:, 0].max()),
                'Core Sum': float(m[:, 0].sum()),
                '#Centroids': int(m.shape[0]),
                'Core Mean': float(m[:, 0].mean()),
            }
        else:
            stats = {
                'Core Min.': None,
                'Core Max.': None,
                'Core Sum': None,
                '#Centroids': 0,
                'Core Mean': None,
            }
        return {
            k: v
            for k, v in stats.items()
            if k not in existing_row or existing_row[k] != v
        }

    def register_nuclei(self,
                        n1_url,
                        n2_url,
                        zyx_scale=(0.4, 0.26, 0.26),
                        filter_status=(3, 7)):
        """Register nuclei files returning alignment matrix and processed and uploaded pointcloud URLs.

           Returns:
             M, n1_url, n2_url
        """
        n1_filename = self.get_file(n1_url)
        n2_filename = self.get_file(n2_url)
        nuc1cmsp = util.load_segment_info_from_csv(n1_filename,
                                                   zyx_scale,
                                                   filter_status=filter_status)
        nuc2cmsp = util.load_segment_info_from_csv(n2_filename,
                                                   zyx_scale,
                                                   filter_status=filter_status)
        M, angles = register.align_centroids(nuc1cmsp[0], nuc2cmsp[0])
        nuc2cmsp = (register.transform_centroids(M,
                                                 nuc2cmsp[0]), ) + nuc2cmsp[1:]
        n1_outfile = 'ImagePair_%s_n1_registered.csv' % self.row['RID']
        n2_outfile = 'ImagePair_%s_n2_registered.csv' % self.row['RID']
        register.dump_registered_file_pair((n1_outfile, n2_outfile),
                                           (nuc1cmsp, nuc2cmsp))
        n1_url = self.store.put_loc('%s/%s' % (self.subject_path, n1_outfile),
                                    n1_outfile,
                                    headers={'Content-Type': 'text/csv'})
        n2_url = self.store.put_loc('%s/%s' % (self.subject_path, n2_outfile),
                                    n2_outfile,
                                    headers={'Content-Type': 'text/csv'})
        return M, n1_url, n2_url

    def matrix_to_prejson(self, M):
        return [[float(M[i, j]) for j in range(4)] for i in range(4)]

    def register_synapses(self,
                          s1_url,
                          s2_url,
                          zyx_scale=(0.4, 0.26, 0.26),
                          filter_status=(3, 7)):
        """Register synaptic files using image pair alignment, returning URLs of processed and uploaded pointcloud URLs.

           Returns:
             s1_url, s2_url
        """
        s1_filename = self.get_file(s1_url)
        s2_filename = self.get_file(s2_url)
        syn1cmsp = util.load_segment_info_from_csv(s1_filename,
                                                   zyx_scale,
                                                   filter_status=filter_status)
        syn2cmsp = util.load_segment_info_from_csv(s2_filename,
                                                   zyx_scale,
                                                   filter_status=filter_status)
        M = np.array(self.row['Alignment'], dtype=np.float64)
        syn2cmsp = (register.transform_centroids(M,
                                                 syn2cmsp[0]), ) + syn2cmsp[1:]
        s1_outfile = 'SynapticPair_%s_s1_registered.csv' % self.row.get('RID')
        s2_outfile = 'SynapticPair_%s_s2_registered.csv' % self.row.get('RID')
        register.dump_registered_file_pair((s1_outfile, s2_outfile),
                                           (syn1cmsp, syn2cmsp))
        s1_url = self.store.put_loc('%s/%s' % (self.subject_path, s1_outfile),
                                    s1_outfile,
                                    headers={'Content-Type': 'text/csv'})
        s2_url = self.store.put_loc('%s/%s' % (self.subject_path, s2_outfile),
                                    s2_outfile,
                                    headers={'Content-Type': 'text/csv'})
        return s1_url, s2_url

    def put_row_update(self, update_row):
        self.catalog.put('%s;%s' % (self.unit.put_update_baseurl, ','.join([
            urlquote(col, safe='')
            for col in list(update_row.keys()) if col not in ['ID', 'RID']
        ])),
                         json=[update_row])
        sys.stderr.write('\nupdated in ERMrest: %s' %
                         json.dumps(update_row, indent=2))

    work_units = _work_units  # these are defined above w/ their funcs and URLs...

    @classmethod
    def look_for_work(cls):
        """Find, claim, and process work for each work unit.

        Do find/claim with HTTP opportunistic concurrency control and
        caching for efficient polling and quiescencs.

        On error, set Status="failed: reason"

        Result:
         true: there might be more work to claim
         false: we failed to find any work
        """
        found_work = False

        for unit in cls.work_units:
            # this handled concurrent update for us to safely and efficiently claim a record
            unit.idle_etag, batch = cls.catalog.state_change_once(
                unit.get_claimable_url, unit.put_claim_url,
                unit.claim_input_data, unit.idle_etag)
            # batch may be empty if no work was found...
            for row, claim in batch:
                found_work = True
                handler = None
                try:
                    handler = cls(row, unit)
                    unit.run_row_job(handler)
                except WorkerBadDataError as e:
                    sys.stderr.write("Aborting task %s on data error: %s\n" %
                                     (row["RID"], e))
                    cls.catalog.put(unit.put_claim_url,
                                    json=[unit.failure_input_data(row, e)])
                    # continue with next task...?
                except Exception as e:
                    # TODO: eat some exceptions and return True to continue?
                    if unit.failure_input_data is not None:
                        cls.catalog.put(unit.put_claim_url,
                                        json=[unit.failure_input_data(row, e)])
                    raise
                finally:
                    if handler is not None:
                        handler.cleanup()

        return found_work

    @classmethod
    def blocking_poll(cls):
        return cls.catalog.blocking_poll(cls.look_for_work,
                                         polling_seconds=cls.poll_seconds)
Example #16
0
class MainWindow(QMainWindow):
    config = None
    credential = None
    config_path = None
    store = None
    catalog = None
    identity = None
    attributes = None
    server = None
    tempdir = None
    progress_update_signal = pyqtSignal(str)
    use_3D_viewer = False
    curator_mode = False

    def __init__(self, config_path=None):
        super(MainWindow, self).__init__()
        self.ui = MainWindowUI(self)
        self.configure(config_path)
        self.authWindow = EmbeddedAuthWindow(
            self,
            config=self.config.get("server"),
            cookie_persistence=False,
            authentication_success_callback=self.onLoginSuccess)
        self.getSession()
        if not self.identity:
            self.ui.actionLaunch.setEnabled(False)
            self.ui.actionRefresh.setEnabled(False)
            self.ui.actionOptions.setEnabled(False)
            self.ui.actionLogout.setEnabled(False)

    def configure(self, config_path):
        # configure logging
        self.ui.logTextBrowser.widget.log_update_signal.connect(self.updateLog)
        self.ui.logTextBrowser.setFormatter(
            logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
        logging.getLogger().addHandler(self.ui.logTextBrowser)
        logging.getLogger().setLevel(logging.INFO)

        # configure Ermrest/Hatrac
        if not config_path:
            config_path = os.path.join(
                os.path.expanduser(
                    os.path.normpath("~/.deriva/synapse/synspy-launcher")),
                "config.json")
        self.config_path = config_path
        config = read_config(self.config_path,
                             create_default=True,
                             default=DEFAULT_CONFIG)
        protocol = config["server"]["protocol"]
        self.server = config["server"]["host"]
        catalog_id = config["server"]["catalog_id"]
        session_config = config.get("session")
        self.catalog = ErmrestCatalog(protocol,
                                      self.server,
                                      catalog_id,
                                      self.credential,
                                      session_config=session_config)
        self.store = HatracStore(protocol,
                                 self.server,
                                 self.credential,
                                 session_config=session_config)

        # create working dir (tempdir)
        self.tempdir = tempfile.mkdtemp(prefix="synspy_")

        # determine viewer mode
        self.use_3D_viewer = True if config.get(
            "viewer_mode", "2d").lower() == "3d" else False

        # curator mode?
        curator_mode = config.get("curator_mode")
        if not curator_mode:
            config["curator_mode"] = False
        self.curator_mode = config.get("curator_mode")

        # save config
        self.config = config
        write_config(self.config_path, self.config)

    def getSession(self):
        qApp.setOverrideCursor(Qt.WaitCursor)
        self.updateStatus("Validating session.")
        queryTask = SessionQueryTask(self.catalog)
        queryTask.status_update_signal.connect(self.onSessionResult)
        queryTask.query()

    def onLoginSuccess(self, **kwargs):
        self.authWindow.hide()
        self.credential = kwargs["credential"]
        self.catalog.set_credentials(self.credential, self.server)
        self.store.set_credentials(self.credential, self.server)
        self.getSession()

    def enableControls(self):
        self.ui.actionLaunch.setEnabled(True)
        self.ui.actionRefresh.setEnabled(True)
        self.ui.actionOptions.setEnabled(self.authWindow.authenticated())
        self.ui.actionLogin.setEnabled(not self.authWindow.authenticated())
        self.ui.actionLogout.setEnabled(self.authWindow.authenticated())
        self.ui.actionExit.setEnabled(True)
        self.ui.workList.setEnabled(True)

    def disableControls(self):
        self.ui.actionLaunch.setEnabled(False)
        self.ui.actionRefresh.setEnabled(False)
        self.ui.actionOptions.setEnabled(False)
        self.ui.actionLogin.setEnabled(False)
        self.ui.actionLogout.setEnabled(False)
        self.ui.actionExit.setEnabled(False)
        self.ui.workList.setEnabled(False)

    def closeEvent(self, event=None):
        self.disableControls()
        self.cancelTasks()
        shutil.rmtree(self.tempdir)
        if event:
            event.accept()

    def cancelTasks(self):
        Request.shutdown()
        self.statusBar().showMessage(
            "Waiting for background tasks to terminate...")

        while True:
            qApp.processEvents()
            if QThreadPool.globalInstance().waitForDone(10):
                break

        self.statusBar().showMessage(
            "All background tasks terminated successfully")

    def is_curator(self):
        for attr in self.attributes:
            if attr.get('id') == CURATORS:
                return True
        return False

    def displayWorklist(self, worklist):
        keys = [
            "RID",
            "RCT",
            "Source Image",
            "Classifier",
            "Due Date",
            "Accepted?",
            "Status",
            "URL",
            "Npz URL",
            "ZYX Slice",
            "Segmentation Mode",
            "Segments URL",
            "Segments Filtered URL",
            "Subject",
        ]
        self.ui.workList.clear()
        self.ui.workList.setRowCount(0)
        self.ui.workList.setColumnCount(0)
        displayed = [
            "RID", "RCT", "Segmentation Mode", "Classifier", "Due Date",
            "Accepted?", "Status"
        ]
        self.ui.workList.setRowCount(len(worklist))
        self.ui.workList.setColumnCount(len(keys))

        self.ui.workList.removeAction(self.ui.markIncompleteAction)
        if self.is_curator() and self.curator_mode:
            self.ui.workList.addAction(self.ui.markIncompleteAction)

        rows = 0
        for row in worklist:
            value = row.get("Status")
            if not (value == "analysis pending" or value == "analysis in progress") \
                    and not (self.is_curator() and self.curator_mode):
                self.ui.workList.hideRow(rows)
            cols = 0
            for key in keys:
                item = QTableWidgetItem()
                if key == "Classifier":
                    value = "%s (%s)" % (row['user'][0]['Full_Name'],
                                         row['user'][0]['Display_Name'])
                    item.setData(Qt.UserRole, row['Classifier'])
                elif key == "URL" or key == "Subject":
                    value = row["source_image"][0].get(key)
                else:
                    value = row.get(key)
                if isinstance(value, bool):
                    value = str(value)
                if isinstance(value, str) and key == 'RCT':
                    value = value.replace(
                        'T', ' ')[0:19]  # drop fractional seconds and TZ
                if isinstance(value, str):
                    item.setText(value)
                    item.setToolTip(value)
                self.ui.workList.setItem(rows, cols, item)
                cols += 1
            rows += 1

        cols = 0
        for key in keys:
            if key not in displayed:
                self.ui.workList.hideColumn(cols)
            cols += 1

        self.ui.workList.setHorizontalHeaderLabels(keys)  # add header names
        self.ui.workList.horizontalHeader().setDefaultAlignment(
            Qt.AlignLeft)  # set alignment
        for col in range(len(displayed)):
            self.ui.workList.resizeColumnToContents(col)
        self.ui.workList.sortByColumn(2, Qt.DescendingOrder)

    def getCacheDir(self):
        cwd = os.getcwd()
        cache_dir = os.path.expanduser(self.config.get("cache_dir", cwd))
        if not os.path.isdir(cache_dir):
            try:
                os.makedirs(cache_dir)
            except OSError as error:
                if error.errno != errno.EEXIST:
                    logging.error(format_exception(error))
                    cache_dir = cwd
        return cache_dir

    def downloadCallback(self, **kwargs):
        status = kwargs.get("progress")
        if status:
            self.progress_update_signal.emit(status)
        return True

    def uploadCallback(self, **kwargs):
        completed = kwargs.get("completed")
        total = kwargs.get("total")
        file_path = kwargs.get("file_path")
        if completed and total:
            file_path = " [%s]" % os.path.basename(
                file_path) if file_path else ""
            status = "Uploading file%s: %d%% complete" % (
                file_path, round(((completed / total) % 100) * 100))
        else:
            summary = kwargs.get("summary", "")
            file_path = "Uploaded file: [%s] " % os.path.basename(
                file_path) if file_path else ""
            status = file_path  # + summary
        if status:
            self.progress_update_signal.emit(status)
        return True

    def serverProblemMessageBox(self, text, detail):
        msg = QMessageBox()
        msg.setIcon(QMessageBox.Warning)
        msg.setWindowTitle("Confirm Action")
        msg.setText(text)
        msg.setInformativeText(
            detail +
            "\n\nWould you like to remove this item from the current worklist?"
        )
        msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
        ret = msg.exec_()
        if ret == QMessageBox.No:
            return
        else:
            row = self.ui.workList.getCurrentTableRow()
            self.ui.workList.removeRow(row)
            return

    def retrieveFiles(self):
        # if there is an existing segments file, download it first, otherwise just initiate the input file download
        seg_mode = self.ui.workList.getCurrentTableItemTextByName(
            "Segmentation Mode")
        segments_url = self.ui.workList.getCurrentTableItemTextByName(
            "Segments Filtered URL")
        if segments_url:
            segments_filename = 'ROI_%s_%s_only.csv' % (
                self.ui.workList.getCurrentTableItemTextByName("RID"),
                seg_mode)
            segments_destfile = os.path.abspath(
                os.path.join(self.tempdir, segments_filename))
            self.updateStatus("Downloading file: [%s]" % segments_destfile)
            downloadTask = FileRetrieveTask(self.store)
            downloadTask.status_update_signal.connect(
                self.onRetrieveAnalysisFileResult)
            self.progress_update_signal.connect(self.updateProgress)
            downloadTask.retrieve(segments_url,
                                  destfile=segments_destfile,
                                  progress_callback=self.downloadCallback)
        else:
            self.retrieveInputFile()

    def retrieveInputFile(self):
        # get the main TIFF file for analysis if not already cached
        if self.use_3D_viewer:
            url = self.ui.workList.getCurrentTableItemTextByName("URL")
            filename = 'Image_%s.ome.tiff' % self.ui.workList.getCurrentTableItemTextByName(
                "Source Image")
        else:
            url = self.ui.workList.getCurrentTableItemTextByName("Npz URL")
            filename = 'ROI_%s.npz' % self.ui.workList.getCurrentTableItemTextByName(
                "RID")
        destfile = os.path.abspath(os.path.join(self.getCacheDir(), filename))
        if not url and not self.use_3D_viewer:
            self.resetUI(
                "Unable to launch 2D viewer due to missing NPZ file for %s." %
                self.ui.workList.getCurrentTableItemTextByName("RID"))
            self.serverProblemMessageBox(
                "2D viewer requires NPZ data to be present!",
                "The launcher is currently configured to execute the 2D viewer, which requires NPZ files for input. "
                + "No NPZ file could be found on the server for this task.")
            return
        if not os.path.isfile(destfile):
            self.updateStatus("Downloading file: [%s]" % destfile)
            downloadTask = FileRetrieveTask(self.store)
            downloadTask.status_update_signal.connect(
                self.onRetrieveInputFileResult)
            self.progress_update_signal.connect(self.updateProgress)
            downloadTask.retrieve(url,
                                  destfile=destfile,
                                  progress_callback=self.downloadCallback)
        else:
            self.onRetrieveInputFileResult(
                True, "The file [%s] already exists" % destfile, None,
                destfile)

    def getSubprocessPath(self):
        executable = "synspy-viewer" if self.use_3D_viewer else "synspy-viewer2d"
        base_path = None
        return os.path.normpath(resource_path(executable, base_path))

    def executeViewer(self, file_path):
        self.updateStatus("Executing viewer...")
        env = os.environ
        env["SYNSPY_AUTO_DUMP_LOAD"] = "true"
        env["DUMP_PREFIX"] = "./ROI_%s" % self.ui.workList.getCurrentTableItemTextByName(
            "RID")
        env["ZYX_SLICE"] = self.ui.workList.getCurrentTableItemTextByName(
            "ZYX Slice")
        env["ZYX_IMAGE_GRID"] = "0.4, 0.26, 0.26"
        env["SYNSPY_DETECT_NUCLEI"] = str(
            "nucleic" == self.ui.workList.getCurrentTableItemTextByName(
                "Segmentation Mode")).lower()
        output_path = os.path.join(os.path.dirname(self.config_path),
                                   "viewer.log")
        classifier = self.ui.workList.getTableItemByName(
            self.ui.workList.getCurrentTableRow(),
            "Classifier").data(Qt.UserRole)
        viewerTask = ViewerTask(self.getSubprocessPath(),
                                self.identity == classifier,
                                proc_output_path=output_path)
        viewerTask.status_update_signal.connect(self.onSubprocessExecuteResult)
        viewerTask.run(file_path, self.tempdir, env)

    def uploadAnalysisResult(self, update_state):
        qApp.setOverrideCursor(Qt.WaitCursor)
        # generate hatrac upload params
        basename = "ROI_%s" % self.ui.workList.getCurrentTableItemTextByName(
            "RID")
        match = r"%s_.*\.csv$" % basename
        output_files = [
            f for f in os.listdir(self.tempdir)
            if os.path.isfile(os.path.join(self.tempdir, f))
            and re.match(match, f)
        ]
        if not output_files:
            self.resetUI(
                "Could not locate output file from viewer subprocess -- aborting."
            )
            return
        seg_mode = self.ui.workList.getCurrentTableItemTextByName(
            "Segmentation Mode")
        if seg_mode == "synaptic":
            extension = "_synaptic_only.csv"
        elif seg_mode == "nucleic":
            extension = "_nucleic_only.csv"
        else:
            self.updateStatus("Unknown segmentation mode \"%s\" -- aborting." %
                              seg_mode)
            return
        file_name = basename + extension
        hatrac_path = HATRAC_UPDATE_URL_TEMPLATE % \
            (self.ui.workList.getCurrentTableItemTextByName("Subject"), file_name)
        file_path = os.path.abspath(os.path.join(self.tempdir, file_name))

        # upload to object store
        self.updateStatus("Uploading file %s to server..." % file_name)
        self.progress_update_signal.connect(self.updateProgress)
        uploadTask = FileUploadTask(self.store)
        uploadTask.status_update_signal.connect(self.onUploadFileResult)
        uploadTask.upload(hatrac_path,
                          file_path,
                          update_state,
                          callback=self.uploadCallback)

    def markIncomplete(self):
        RID = self.ui.workList.getCurrentTableItemTextByName("RID")
        body = [{"RID": RID, "Status": "analysis in progress"}]
        self.updateStatus("Updating task status for %s..." % RID)
        updateTask = CatalogUpdateTask(self.catalog)
        updateTask.status_update_signal.connect(self.onCatalogUpdateResult)
        updateTask.update(WORKLIST_STATUS_UPDATE, json=body)

    @pyqtSlot()
    def taskTriggered(self):
        self.ui.logTextBrowser.widget.clear()
        self.disableControls()

    @pyqtSlot(str)
    def updateProgress(self, status):
        self.statusBar().showMessage(status)

    @pyqtSlot(str, str)
    def updateStatus(self, status, detail=None):
        logging.info(status + ((": %s" % detail) if detail else ""))
        self.statusBar().showMessage(status)

    @pyqtSlot(str, str)
    def resetUI(self, status, detail=None):
        qApp.restoreOverrideCursor()
        self.updateStatus(status, detail)
        self.enableControls()

    @pyqtSlot(str)
    def updateLog(self, text):
        self.ui.logTextBrowser.widget.appendPlainText(text)

    @pyqtSlot(bool, str, str, object)
    def onSessionResult(self, success, status, detail, result):
        qApp.restoreOverrideCursor()
        if success:
            self.identity = result["client"]["id"]
            self.attributes = result["attributes"]
            display_name = result["client"]["full_name"]
            self.setWindowTitle(
                "%s (%s - %s)" %
                (self.windowTitle(), self.server, display_name))
            self.ui.actionLaunch.setEnabled(True)
            self.ui.actionLogout.setEnabled(True)
            self.ui.actionLogin.setEnabled(False)
            if not self.is_curator():
                self.curator_mode = self.config["curator_mode"] = False
            self.on_actionRefresh_triggered()
        else:
            self.updateStatus("Login required.")

    @pyqtSlot()
    def on_actionLaunch_triggered(self):
        self.disableControls()
        qApp.setOverrideCursor(Qt.WaitCursor)
        # create working dir (tempdir)
        if self.tempdir:
            shutil.rmtree(self.tempdir)
        self.tempdir = tempfile.mkdtemp(prefix="synspy_")
        self.retrieveFiles()

    @pyqtSlot(bool, str, str, str)
    def onRetrieveAnalysisFileResult(self, success, status, detail, file_path):
        if not success:
            try:
                os.remove(file_path)
            except Exception as e:
                logging.warning("Unable to remove file [%s]: %s" %
                                (file_path, format_exception(e)))
            self.resetUI(status, detail)
            self.serverProblemMessageBox(
                "Unable to download required input file",
                "The in-progress analysis file was not downloaded successfully."
            )
            return

        self.retrieveInputFile()

    @pyqtSlot(bool, str, str, str)
    def onRetrieveInputFileResult(self, success, status, detail, file_path):
        if not success:
            try:
                os.remove(file_path)
            except Exception as e:
                logging.warning("Unable to remove file [%s]: %s" %
                                (file_path, format_exception(e)))
            self.resetUI(status, detail)
            self.serverProblemMessageBox(
                "Unable to download required input file",
                "The image input file was not downloaded successfully.")
            return

        self.executeViewer(file_path)

    @pyqtSlot(bool, str, str, bool)
    def onSubprocessExecuteResult(self, success, status, detail, is_owner):
        qApp.restoreOverrideCursor()
        if not success:
            self.resetUI(status, detail)
            return

        if not is_owner or self.curator_mode:
            self.resetUI(status, detail)
            return

        # prompt for save/complete/discard
        msg = QMessageBox()
        msg.setIcon(QMessageBox.Information)
        msg.setWindowTitle("Confirm Action")
        msg.setText("How would you like to proceed?")
        msg.setInformativeText(
            "Select \"Save Progress\" to save your progress and upload the output to the server.\n\n"
            "Select \"Complete\" to upload the output to the server and mark this task as completed.\n\n"
            "Select \"Discard\" to abort the process and leave the task state unchanged."
        )
        saveButton = msg.addButton("Save Progress", QMessageBox.ActionRole)
        completeButton = msg.addButton("Complete", QMessageBox.ActionRole)
        discardButton = msg.addButton("Discard", QMessageBox.RejectRole)
        msg.exec_()
        if msg.clickedButton() == discardButton:
            self.resetUI("Aborted.")
            return
        update_state = None
        if msg.clickedButton() == saveButton:
            update_state = ("incomplete", "analysis in progress")
        elif msg.clickedButton() == completeButton:
            update_state = ("complete", "analysis complete")

        self.uploadAnalysisResult(update_state)

    @pyqtSlot(bool, str, str, object)
    def onUploadFileResult(self, success, status, detail, result):
        if not success:
            self.resetUI(status, detail)
            self.serverProblemMessageBox(
                "Unable to upload required file(s)",
                "One or more required files were not uploaded successfully.")
            return
        state = result[0]
        RID = self.ui.workList.getCurrentTableItemTextByName("RID")
        body = [{
            "RID": RID,
            "Segments Filtered URL": result[1],
            "Status": state[1]
        }]
        self.updateStatus("Updating task status for %s..." % RID)
        updateTask = CatalogUpdateTask(self.catalog)
        updateTask.status_update_signal.connect(self.onCatalogUpdateResult)
        updateTask.update(WORKLIST_UPDATE, json=body)

    @pyqtSlot(bool, str, str, object)
    def onCatalogUpdateResult(self, success, status, detail, result):
        if not success:
            self.resetUI(status, detail)
            self.serverProblemMessageBox(
                "Unable to update catalog data",
                "The catalog state was not updated successfully.")
            return
        qApp.restoreOverrideCursor()
        self.on_actionRefresh_triggered()

    @pyqtSlot()
    def on_actionRefresh_triggered(self):
        if not self.identity:
            self.updateStatus("Unable to get worklist -- not logged in.")
            return
        qApp.setOverrideCursor(Qt.WaitCursor)
        self.disableControls()
        self.updateStatus("Refreshing worklist...")
        queryTask = CatalogQueryTask(self.catalog)
        queryTask.status_update_signal.connect(self.onRefreshResult)
        if self.is_curator() and self.curator_mode:
            queryTask.query(WORKLIST_CURATOR_QUERY)
        else:
            queryTask.query(WORKLIST_QUERY % urlquote(self.identity, ""))

    @pyqtSlot(bool, str, str, object)
    def onRefreshResult(self, success, status, detail, result):
        if success:
            self.displayWorklist(result)
            self.resetUI("Ready.")
        else:
            self.resetUI(status, detail)

        if (self.ui.workList.rowCount() > 0) and self.identity:
            self.ui.actionLaunch.setEnabled(True)
        else:
            self.ui.actionLaunch.setEnabled(False)

    @pyqtSlot()
    def on_actionLogin_triggered(self):
        self.authWindow.show()
        self.authWindow.login()

    @pyqtSlot()
    def on_actionLogout_triggered(self):
        self.authWindow.logout()
        self.setWindowTitle("%s %s" % (self.ui.title, synspy_version))
        self.ui.workList.clearContents()
        self.ui.workList.setRowCount(0)
        self.identity = None
        self.ui.actionLaunch.setEnabled(False)
        self.ui.actionLogout.setEnabled(False)
        self.ui.actionLogin.setEnabled(True)

    @pyqtSlot()
    def on_actionHelp_triggered(self):
        pass

    @pyqtSlot()
    def on_actionOptions_triggered(self):
        OptionsDialog.getOptions(self)

    @pyqtSlot()
    def on_actionExit_triggered(self):
        self.closeEvent()
        QCoreApplication.quit()
Example #17
0
class HatracClient (object):
    """
    Network client for hatrac.
    """
    ## Derived from the ermrest iobox service client

    def __init__(self, **kwargs):
        self.baseuri = kwargs.get("baseuri")
        o = urlparse.urlparse(self.baseuri)
        self.scheme = o[0]
        host_port = o[1].split(":")
        self.host = host_port[0]
        self.path = o.path
        self.port = None
        if len(host_port) > 1:
            self.port = host_port[1]
        self.cookie = kwargs.get("cookie")
        self.store = HatracStore(
            self.scheme, 
            self.host,
            {'cookie': self.cookie}
        )
        self.catalog = PollingErmrestCatalog(
            self.scheme, 
            self.host,
            self.path.split('/')[-1],
            {'cookie': self.cookie}
        )
        self.mail_server = kwargs.get("mail_server")
        self.mail_sender = kwargs.get("mail_sender")
        self.mail_receiver = kwargs.get("mail_receiver")
        self.logger = kwargs.get("logger")
        self.logger.debug('Hatrac Client initialized.')

    """
    Send email notification
    """
    def sendMail(self, subject, text):
        if self.mail_server and self.mail_sender and self.mail_receiver:
            retry = 0
            ready = False
            while not ready:
                try:
                    msg = MIMEText('%s\n\n%s' % (text, mail_footer), 'plain')
                    msg['Subject'] = subject
                    msg['From'] = self.mail_sender
                    msg['To'] = self.mail_receiver
                    s = smtplib.SMTP(self.mail_server)
                    s.sendmail(self.mail_sender, self.mail_receiver.split(','), msg.as_string())
                    s.quit()
                    self.logger.debug('Sent email notification.')
                    ready = True
                except socket.gaierror as e:
                    if e.errno == socket.EAI_AGAIN:
                        time.sleep(100)
                        retry = retry + 1
                        ready = retry > 10
                    else:
                        ready = True
                    if ready:
                        et, ev, tb = sys.exc_info()
                        self.logger.error('got exception "%s"' % str(ev))
                        self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error('got exception "%s"' % str(ev))
                    self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                    ready = True

    """
    Start the process for deleting files from hatrac
    """
    def start(self):
        try:
            self.deleteFromHatrac()
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Delete Hatrac: unexpected exception', '%s\nThe process might have been stopped\n' % str(traceback.format_exception(et, ev, tb)))
            raise
        
    """
    Delete videos from hatrac
    """
    def deleteFromHatrac(self):
        url = '/entity/Common:Delete_Hatrac/Hatrac_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::' 
        resp = self.catalog.get(url)
        resp.raise_for_status()
        files = resp.json()
        fileids = []
        for f in files:
            fileids.append((f['Hatrac_URI'], f['RID']))
                
        self.logger.debug('Deleting from hatrac %d files(s).' % (len(fileids))) 
        for hatrac_uri,rid in fileids:
            try:
                self.store.del_obj(hatrac_uri)
                self.logger.debug('SUCCEEDED deleted from hatrac the "%s" file.' % (hatrac_uri)) 
                columns = ["Hatrac_Deleted", "Processing_Status"]
                columns = ','.join([urlquote(col) for col in columns])
                url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns)
                obj = {'RID': rid,
                       'Hatrac_Deleted': True,
                       'Processing_Status': 'success'
                       }
                self.catalog.put(
                    url,
                    json=[obj]
                )
                self.logger.debug('SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac URL: "%s".' % (hatrac_uri)) 
            except Exception as e:
                et, ev, tb = sys.exc_info()
                self.logger.error('got exception "%s"' % str(ev))
                self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                self.reportFailure(rid, str(e))
        
        
    """
    Update the Delete_Hatrac table with the ERROR status
    """
    def reportFailure(self, rid, error_message):
        """
            Update the Delete_Hatrac table with the failure result.
        """
        try:
            columns = ["Processing_Status"]
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns)
            obj = {'RID': rid,
                   'Processing_Status': '%s' % error_message
                   }
            self.catalog.put(
                url,
                json=[obj]
            )
            self.logger.debug('SUCCEEDED updated the Delete_Hatrac table for the RID "%s"  with the Processing_Status result "%s".' % (rid, error_message)) 
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Delete Hatrac: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
Example #18
0
class DerivaDownload(object):
    """

    """
    def __init__(self, server,
                 output_dir=None, kwargs=None, config=None, config_file=None, credentials=None, credential_file=None):
        self.server = server
        self.hostname = None
        self.output_dir = output_dir if output_dir else "."
        self.envars = kwargs if kwargs else dict()
        self.catalog = None
        self.store = None
        self.config = config
        self.cancelled = False
        self.credentials = credentials if credentials else dict()
        self.metadata = dict()
        self.sessions = dict()

        info = "%s v%s [Python %s, %s]" % (
            self.__class__.__name__, VERSION, platform.python_version(), platform.platform(aliased=True))
        logging.info("Initializing downloader: %s" % info)

        if not self.server:
            raise RuntimeError("Server not specified!")

        # server variable initialization
        self.hostname = self.server.get('host', '')
        if not self.hostname:
            raise RuntimeError("Host not specified!")
        protocol = self.server.get('protocol', 'https')
        self.server_url = protocol + "://" + self.hostname
        catalog_id = self.server.get("catalog_id", "1")
        session_config = self.server.get('session')

        # credential initialization
        if credential_file:
            self.credentials = get_credential(self.hostname, credential_file)

        # catalog and file store initialization
        if self.catalog:
            del self.catalog
        self.catalog = ErmrestCatalog(
            protocol, self.hostname, catalog_id, self.credentials, session_config=session_config)
        if self.store:
            del self.store
        self.store = HatracStore(
            protocol, self.hostname, self.credentials, session_config=session_config)

        # process config file
        if config_file and os.path.isfile(config_file):
            self.config = read_config(config_file)

    def setConfig(self, config):
        self.config = config

    def setCredentials(self, credentials):
        self.catalog.set_credentials(credentials, self.hostname)
        self.store.set_credentials(credentials, self.hostname)
        self.credentials = credentials

    def download(self, identity=None):

        if not self.config:
            raise RuntimeError("No configuration specified!")

        if self.config.get("catalog") is None:
            raise RuntimeError("Catalog configuration error!")

        if not identity:
            logging.info("Validating credentials")
            try:
                if not self.credentials:
                    self.setCredentials(get_credential(self.hostname))
                attributes = self.catalog.get_authn_session().json()
                identity = attributes["client"]
            except Exception as e:
                raise RuntimeError("Unable to validate credentials: %s" % format_exception(e))

        ro_manifest = None
        ro_author_name = None
        ro_author_orcid = None
        remote_file_manifest = os.path.abspath(
            ''.join([os.path.join(self.output_dir, 'remote-file-manifest_'), str(uuid.uuid4()), ".json"]))

        catalog_config = self.config['catalog']
        self.envars.update(self.config.get('env', dict()))

        bag_path = None
        bag_archiver = None
        bag_algorithms = None
        bag_config = self.config.get('bag')
        create_bag = True if bag_config else False
        if create_bag:
            bag_name = bag_config.get('bag_name', ''.join(["deriva_bag", '_', time.strftime("%Y-%m-%d_%H.%M.%S")]))
            bag_path = os.path.abspath(os.path.join(self.output_dir, bag_name))
            bag_archiver = bag_config.get('bag_archiver')
            bag_algorithms = bag_config.get('bag_algorithms', ['sha256'])
            bag_metadata = bag_config.get('bag_metadata', {"Internal-Sender-Identifier":
                                                           "deriva@%s" % self.server_url})
            bag_ro = create_bag and stob(bag_config.get('bag_ro', "True"))
            if create_bag:
                bdb.ensure_bag_path_exists(bag_path)
                bag = bdb.make_bag(bag_path, algs=bag_algorithms, metadata=bag_metadata)
                if bag_ro:
                    ro_author_name = bag.info.get("Contact-Name",
                                                  identity.get('full_name',
                                                               identity.get('display_name',
                                                                            identity.get('id', None))))
                    ro_author_orcid = bag.info.get("Contact-Orcid")
                    ro_manifest = ro.init_ro_manifest(author_name=ro_author_name, author_orcid=ro_author_orcid)
                    bag_metadata.update({BAG_PROFILE_TAG: BDBAG_RO_PROFILE_ID})

        file_list = list()
        base_path = bag_path if bag_path else self.output_dir
        for query in catalog_config['queries']:
            query_path = query['query_path']
            output_format = query['output_format']
            output_processor = query.get("output_format_processor")
            format_args = query.get('output_format_params', None)
            output_path = query.get('output_path', '')

            try:
                download_processor = findProcessor(output_format, output_processor)
                processor = download_processor(self.envars,
                                               bag=create_bag,
                                               catalog=self.catalog,
                                               store=self.store,
                                               query=query_path,
                                               base_path=base_path,
                                               sub_path=output_path,
                                               format_args=format_args,
                                               remote_file_manifest=remote_file_manifest,
                                               ro_manifest=ro_manifest,
                                               ro_author_name=ro_author_name,
                                               ro_author_orcid=ro_author_orcid)
                file_list.extend(processor.process())
            except Exception as e:
                logging.error(format_exception(e))
                if create_bag:
                    bdb.cleanup_bag(bag_path)
                raise

        if create_bag:
            try:
                if ro_manifest:
                    ro.write_bag_ro_metadata(ro_manifest, bag_path)
                if not os.path.isfile(remote_file_manifest):
                    remote_file_manifest = None
                bdb.make_bag(bag_path, algs=bag_algorithms, remote_file_manifest=remote_file_manifest, update=True)
            except Exception as e:
                logging.fatal("Exception while updating bag manifests: %s", format_exception(e))
                bdb.cleanup_bag(bag_path)
                raise
            finally:
                if remote_file_manifest and os.path.isfile(remote_file_manifest):
                    os.remove(remote_file_manifest)

            logging.info('Created bag: %s' % bag_path)

            if bag_archiver is not None:
                try:
                    archive = bdb.archive_bag(bag_path, bag_archiver.lower())
                    bdb.cleanup_bag(bag_path)
                    return [archive]
                except Exception as e:
                    logging.error("Exception while creating data bag archive:", format_exception(e))
                    raise
            else:
                return [bag_path]

        return file_list
Example #19
0
    def initialize(self, cleanup=False):
        info = "%s v%s [Python %s, %s]" % (self.__class__.__name__, VERSION,
                                           platform.python_version(),
                                           platform.platform(aliased=True))
        logging.info("Initializing uploader: %s" % info)

        # cleanup invalidates the current configuration and credentials in addition to clearing internal state
        if cleanup:
            self.cleanup()
        # reset just clears the internal state
        else:
            self.reset()

        if not self.server:
            logging.warning(
                "A server was not specified and an internal default has not been set."
            )
            return

        # server variable initialization
        protocol = self.server.get('protocol', 'https')
        host = self.server.get('host', '')
        self.server_url = protocol + "://" + host
        catalog_id = self.server.get("catalog_id", "1")
        session_config = self.server.get('session')

        # overriden credential initialization
        if self.override_credential_file:
            self.credentials = get_credential(host, self.override_config_file)

        # catalog and file store initialization
        if self.catalog:
            del self.catalog
        self.catalog = ErmrestCatalog(protocol,
                                      host,
                                      catalog_id,
                                      self.credentials,
                                      session_config=session_config)
        if self.store:
            del self.store
        self.store = HatracStore(protocol,
                                 host,
                                 self.credentials,
                                 session_config=session_config)

        # transfer state initialization
        self.loadTransferState()
        """
         Configuration initialization - this is a bit complex because we allow for:
             1. Run-time overriding of the config file location.
             2. Sub-classes of this class to bundle their own default configuration files in an arbitrary location.
             3. The updating of already deployed configuration files if bundled internal defaults are newer.             
        """
        config_file = self.override_config_file if self.override_config_file else None
        # 1. If we don't already have a valid (i.e., overridden) path to a config file...
        if not (config_file and os.path.isfile(config_file)):
            # 2. Get the currently deployed config file path, which could possibly be overridden by subclass
            config_file = self.getDeployedConfigFilePath()
            # 3. If the deployed default path is not valid, OR, it is valid AND is older than the bundled default
            if (not (config_file and os.path.isfile(config_file))
                    or self.isFileNewer(self.getDefaultConfigFilePath(),
                                        self.getDeployedConfigFilePath())):
                # 4. If we can locate a bundled default config file,
                if os.path.isfile(self.getDefaultConfigFilePath()):
                    # 4.1 Copy the bundled default config file to the deployment-specific config path
                    copy_config(self.getDefaultConfigFilePath(), config_file)
                else:
                    # 4.2 Otherwise, fallback to writing a failsafe default based on internal hardcoded settings
                    write_config(config_file, DefaultConfig)
        # 5. Finally, read the resolved configuration file into a config object
        self._update_internal_config(read_config(config_file))
Example #20
0
class PyramidalClient (object):
    """Network client for generating pyramidal tiles.
    """
    ## Derived from the ermrest iobox service client

    def __init__(self, **kwargs):
        self.metadata = kwargs.get("metadata")
        self.baseuri = kwargs.get("baseuri")
        o = urlparse.urlparse(self.baseuri)
        self.scheme = o[0]
        host_port = o[1].split(":")
        self.host = host_port[0]
        self.path = o.path
        self.port = None
        if len(host_port) > 1:
            self.port = host_port[1]
        self.dzi = kwargs.get("dzi")
        self.thumbnails = kwargs.get("thumbnails")
        self.czi2dzi = kwargs.get("czi2dzi")
        self.viewer = kwargs.get("viewer")
        self.czirules = kwargs.get("czirules")
        self.showinf = kwargs.get("showinf")
        self.data_scratch = kwargs.get("data_scratch")
        self.cookie = kwargs.get("cookie")
        self.store = HatracStore(
            self.scheme, 
            self.host,
            {'cookie': self.cookie}
        )
        self.catalog = PollingErmrestCatalog(
            self.scheme, 
            self.host,
            self.path.split('/')[-1],
            {'cookie': self.cookie}
        )
        self.mail_server = kwargs.get("mail_server")
        self.mail_sender = kwargs.get("mail_sender")
        self.mail_receiver = kwargs.get("mail_receiver")
        self.logger = kwargs.get("logger")
        self.logger.debug('Client initialized.')

    """
    Send email notification
    """
    def sendMail(self, subject, text):
        if self.mail_server and self.mail_sender and self.mail_receiver:
            retry = 0
            ready = False
            while not ready:
                try:
                    msg = MIMEText('%s\n\n%s' % (text, mail_footer), 'plain')
                    msg['Subject'] = subject
                    msg['From'] = self.mail_sender
                    msg['To'] = self.mail_receiver
                    s = smtplib.SMTP(self.mail_server)
                    s.sendmail(self.mail_sender, self.mail_receiver.split(','), msg.as_string())
                    s.quit()
                    self.logger.debug('Sent email notification.')
                    ready = True
                except socket.gaierror as e:
                    if e.errno == socket.EAI_AGAIN:
                        time.sleep(100)
                        retry = retry + 1
                        ready = retry > 10
                    else:
                        ready = True
                    if ready:
                        et, ev, tb = sys.exc_info()
                        self.logger.error('got exception "%s"' % str(ev))
                        self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error('got exception "%s"' % str(ev))
                    self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                    ready = True

    """
    Start the process for generating pyramidal tiles
    """
    def start(self):
        try:
            self.processHistologicalImages()
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: unexpected exception', '%s\nThe process might have been stopped\n' % str(traceback.format_exception(et, ev, tb)))
            raise
        
    def processHistologicalImages(self):
        """
        Query for detecting new slides - the most recently first
        """
        url = '/entity/Histological_Images:HE_Slide/!File_Bytes::null::&Pyramid_URL::null::/Processing_Status=in%%20progress;Processing_Status::null::@sort(%s::desc::)' % (urlquote('RCT'))
        
        resp = self.catalog.get(url)
        resp.raise_for_status()
        slides = resp.json()
        slideids = []
        for slide in slides:
            slideids.append((slide['ID'], slide['Filename'], slide['File_URL'], slide['RCT'], slide['File_MD5'], slide['Name'], slide['RID']))
                
        self.logger.debug('Processing %d HistologicalImages slides(s).' % (len(slideids))) 
                
        for slideId,filename,file_url,creation_time,md5,name,rid in slideids:
            self.logger.debug('Generating pyramidal tiles for the file "%s"' % (filename))
            
            """
            Extract the file from hatrac
            """
            f = self.getHatracFile(filename, file_url)
            
            if f == None:
                continue
            
            """
            Create the directory for the tiles
            """
            year = parse(creation_time).strftime("%Y")
            outdir = '%s/%s/%s' % (self.dzi, year, md5)
            if not os.path.exists(outdir):
                os.makedirs(outdir)
            
            """
            Convert the file to DZI
            """
            returncode = self.convert2dzi(f, outdir)
            
            if returncode != 0:
                """
                Update the slide table with the failure result.
                """
                self.updateAttributes('Histological_Images:HE_Slide',
                                     rid,
                                     ["Thumbnail", "Processing_Status"],
                                     {'RID': rid,
                                      'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                                      'Processing_Status': 'czi2dzi error'
                                      })
                continue
            
            """
            Generate the thumbnail
            """
            thumbnail,urls = self.writeThumbnailImage(f, year, md5)
            
            if thumbnail == None:
                """
                Update the slide table with the failure result.
                """
                self.updateAttributes('Histological_Images:HE_Slide',
                                     rid,
                                     ["Thumbnail", "Processing_Status"],
                                     {'RID': rid,
                                      'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                                      'Processing_Status': 'DZI failure'
                                      })
                continue
                
            """
            Extract the metadata
            """
            self.logger.debug('Extracting metadata for filename "%s"' % (filename)) 
            bioformatsClient = BioformatsClient(showinf=self.showinf, \
                                                czirules=self.czirules, \
                                                cziFile=f, \
                                                logger=self.logger)
            try:
                metadata = bioformatsClient.getMetadata()
                if metadata == None:
                    metadata = {}
                self.logger.debug('Metadata: "%s"' % str(metadata)) 
                os.remove('temp.xml')
            except XMLSyntaxError:
                et, ev, tb = sys.exc_info()
                self.logger.error('got unexpected exception "%s"' % str(ev))
                self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                self.sendMail('FAILURE Tiles: XMLSyntaxError', '%s\n' % str(traceback.format_exception(et, ev, tb)))
                metadata = {}
                    
            os.remove(f)
            
            """
            Update the slide table with the success result.
            """
            self.updateAttributes('Histological_Images:HE_Slide',
                                 rid,
                                 ["Thumbnail","Pyramid_URL","Processing_Status","uri"],
                                 {'RID': rid,
                                   'Thumbnail': thumbnail,
                                   'Pyramid_URL': '/%s?%s' % (self.viewer, urls),
                                   'uri': '/%s?%s' % (self.viewer, urls),
                                   "Processing_Status": 'success'
                                  })
            
            self.logger.debug('SUCCEEDED created the tiles directory for the file "%s".' % (filename)) 
            
            """
            Update/Create the image entry with the metadata
            """
            obj = {}
            obj['ID'] = slideId
            obj['Name'] = name
            obj['url'] = '/chaise/viewer/#2/Histological_Images:HE_Slide/ID=%d' % slideId
            columns = ['ID', 'Name', 'url']
            for col in self.metadata:
                if col in metadata and metadata[col] != None:
                    columns.append(col)
                    obj[col] = metadata[col]
                    
            """
            Check if we have an update or create
            """
            rid = self.getRID('Histological_Images:HE_Image', 'ID=%d' % slideId)
            if rid != None:
                obj['RID'] = rid
                self.updateAttributes('Histological_Images:HE_Image',
                                     rid,
                                     columns,
                                     obj
                                      )
            else:
                self.createEntity('Histological_Images:HE_Image', obj)
                
            self.logger.debug('SUCCEEDED created the image entry for the file "%s".' % (filename)) 
            
        self.logger.debug('Ended HistologicalImages Slides Processing.') 
        
    """
    Extract the file from hatrac
    """
    def getHatracFile(self, filename, file_url):
        try:
            hatracFile = '%s/%s' % (self.data_scratch, filename)
            self.store.get_obj(file_url, destfilename=hatracFile)
            self.logger.debug('File "%s", %d bytes.' % (hatracFile, os.stat(hatracFile).st_size)) 
            return hatracFile
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: write thumbnail ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
            return None

    """
    Generate the thumbnail
    """
    def writeThumbnailImage(self, filename, year, md5):
        try:
            scanDir='%s/%s/%s' % (self.dzi, year, md5)
            channels = []
            for channel in os.listdir(scanDir):
                if os.path.isdir('%s%s%s' % (scanDir, os.sep, channel)):
                   channels.append( channel)
            outdir = '%s/%s' % (self.thumbnails, year)
            if not os.path.exists(outdir):
                os.makedirs(outdir)
            shutil.copyfile('%s/%s/%s/%s/0/0_0.jpg' % (self.dzi, year, md5, channels[0]), '%s/%s.jpg' % (outdir, md5))
            thumbnail = '/thumbnails/%s/%s.jpg' % (urlquote(year), urlquote(md5))
            urls = []
            for channel in channels:
                urls.append('url=/data/%s/%s/%s/ImageProperties.xml' % (year, md5, channel))
            return (thumbnail, '&'.join(urls))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: write thumbnail ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
            os.remove(filename)
            return (None, None)
            
    """
    Convert the input file to DZI
    """
    def convert2dzi(self, filename, outdir):
        try:
            currentDirectory=os.getcwd()
            os.chdir(self.dzi)
            args = [self.czi2dzi, filename, outdir]
            p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            stdoutdata, stderrdata = p.communicate()
            returncode = p.returncode
            os.chdir(currentDirectory)
            
            if returncode != 0:
                self.logger.error('Can not convert czi to dzi for file "%s".\nstdoutdata: %s\nstderrdata: %s\n' % (filename, stdoutdata, stderrdata)) 
                self.sendMail('FAILURE Tiles', 'Can not convert czi to dzi for file "%s".\nstdoutdata: %s\nstderrdata: %s\n' % (filename, stdoutdata, stderrdata))
                os.remove(filename)
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: czi2dzi ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
            os.chdir(currentDirectory)
            self.logger.error('Can not generate pyramidal tiles for the file "%s".\nstdoutdata: %s\nstderrdata: %s\n' % (filename, stdoutdata, stderrdata)) 
            self.sendMail('FAILURE Tiles', 'Can not generate pyramidal tiles for the file "%s".\nstdoutdata: %s\nstderrdata: %s\n' % (filename, stdoutdata, stderrdata))
            returncode = 1
            
        return returncode
            
        
    """
    Update the ermrest attributes
    """
    def updateAttributes (self, path, rid, columns, row):
        """
        Update the ermrest attributes with the row values.
        """
        try:
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/%s/RID;%s' % (path, columns)
            resp = self.catalog.put(
                url,
                json=[row]
            )
            resp.raise_for_status()
            self.logger.debug('SUCCEEDED updated the table "%s" for the RID "%s"  with "%s".' % (path, rid, json.dumps(row, indent=4))) 
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
            
    """
    Insert a row in a table
    """
    def createEntity (self, path, row):
        """
        Insert the row in the table.
        """
        try:
            url = '/entity/%s' % (path)
            resp = self.catalog.post(
                url,
                json=[row]
            )
            resp.raise_for_status()
            self.logger.debug('SUCCEEDED created in the table "%s" the entry "%s".' % (path, json.dumps(row, indent=4))) 
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))

    """
    Check if an entry exist in the table
    Return the RID if it exists, and None otherwise
    """
    def getRID (self, path, predicate):
        """
        Get the RID of the row.
        """
        try:
            RID = None
            url = '/entity/%s/%s' % (path, predicate)
            resp = self.catalog.get(url)
            resp.raise_for_status()
            rows = resp.json()
            if len(rows) == 1:
                RID = rows[0]['RID']
            self.logger.debug('RID for the url = "%s" is "%s".' % (url, RID)) 
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
        
        return RID
servername = options.server
credentialsfilename = options.credentials
catalog = 1
schema = 'Microscopy'
table = 'Scan'
acquisition = 'Acquisition Date'
czi = 'HTTP URL'
rid = 'RID'
rct = 'RCT'
filename = 'filename'

credentials = json.load(open(credentialsfilename))
catalog = ErmrestCatalog('https', servername, catalog, credentials)

hatrac_store = HatracStore('https', servername,
                           {'cookie': credentials['cookie']})
url = '/attribute/%s:%s/%s::null::/%s,%s,%s,%s' % (
    urlquote(schema), urlquote(table), urlquote(acquisition), urlquote(rid),
    urlquote(rct), urlquote(filename), urlquote(czi))
print 'Query URL: "%s"' % url

resp = catalog.get(url)
resp.raise_for_status()
rows = resp.json()

entities = []
for row in rows:
    if options.skip == True:
        acquisitionDate = row[rct][:10]
    else:
        acquisitionDate = getAcquisitionDate(row)
Example #22
0
class DerivaDownload(object):
    """

    """
    def __init__(self, server, **kwargs):
        self.server = server
        self.hostname = None
        self.catalog = None
        self.store = None
        self.cancelled = False
        self.output_dir = os.path.abspath(kwargs.get("output_dir", "."))
        self.envars = kwargs.get("envars", dict())
        self.config = kwargs.get("config")
        self.credentials = kwargs.get("credentials", dict())
        config_file = kwargs.get("config_file")
        credential_file = kwargs.get("credential_file")
        self.metadata = dict()
        self.sessions = dict()

        info = "%s v%s [Python %s, %s]" % (
            self.__class__.__name__, get_installed_version(VERSION),
            platform.python_version(), platform.platform(aliased=True))
        logging.info("Initializing downloader: %s" % info)

        if not self.server:
            raise DerivaDownloadConfigurationError("Server not specified!")

        # server variable initialization
        self.hostname = self.server.get('host', '')
        if not self.hostname:
            raise DerivaDownloadConfigurationError("Host not specified!")
        protocol = self.server.get('protocol', 'https')
        self.server_url = protocol + "://" + self.hostname
        catalog_id = self.server.get("catalog_id", "1")
        session_config = self.server.get('session')

        # credential initialization
        token = kwargs.get("token")
        oauth2_token = kwargs.get("oauth2_token")
        username = kwargs.get("username")
        password = kwargs.get("password")
        if credential_file:
            self.credentials = get_credential(self.hostname, credential_file)
        elif token or oauth2_token or (username and password):
            self.credentials = format_credential(token=token,
                                                 oauth2_token=oauth2_token,
                                                 username=username,
                                                 password=password)

        # catalog and file store initialization
        if self.catalog:
            del self.catalog
        self.catalog = ErmrestCatalog(protocol,
                                      self.hostname,
                                      catalog_id,
                                      self.credentials,
                                      session_config=session_config)
        if self.store:
            del self.store
        self.store = HatracStore(protocol,
                                 self.hostname,
                                 self.credentials,
                                 session_config=session_config)

        # init dcctx cid to a default
        self.set_dcctx_cid(self.__class__.__name__)

        # process config file
        if config_file:
            try:
                self.config = read_config(config_file)
            except Exception as e:
                raise DerivaDownloadConfigurationError(e)

    def set_dcctx_cid(self, cid):
        assert cid, "A dcctx cid is required"
        if self.catalog:
            self.catalog.dcctx['cid'] = cid
        if self.store:
            self.store.dcctx['cid'] = cid

    def set_config(self, config):
        self.config = config

    def set_credentials(self, credentials):
        self.catalog.set_credentials(credentials, self.hostname)
        self.store.set_credentials(credentials, self.hostname)
        self.credentials = credentials

    def download(self, **kwargs):

        if not self.config:
            raise DerivaDownloadConfigurationError(
                "No configuration specified!")

        if self.config.get("catalog") is None:
            raise DerivaDownloadConfigurationError(
                "Catalog configuration error!")

        ro_manifest = None
        ro_author_name = None
        ro_author_orcid = None
        remote_file_manifest = os.path.abspath(''.join([
            os.path.join(self.output_dir, 'remote-file-manifest_'),
            str(uuid.uuid4()), ".json"
        ]))

        catalog_config = self.config['catalog']
        self.envars.update(self.config.get('env', dict()))
        self.envars.update({"hostname": self.hostname})

        # 1. If we don't have a client identity, we need to authenticate
        identity = kwargs.get("identity")
        if not identity:
            try:
                if not self.credentials:
                    self.set_credentials(get_credential(self.hostname))
                logging.info("Validating credentials for host: %s" %
                             self.hostname)
                attributes = self.catalog.get_authn_session().json()
                identity = attributes["client"]
            except HTTPError as he:
                if he.response.status_code == 404:
                    logging.info(
                        "No existing login session found for host: %s" %
                        self.hostname)
            except Exception as e:
                raise DerivaDownloadAuthenticationError(
                    "Unable to validate credentials: %s" % format_exception(e))
        wallet = kwargs.get("wallet", {})

        # 2. Check for bagging config and initialize bag related variables
        bag_path = None
        bag_archiver = None
        bag_algorithms = None
        bag_config = self.config.get('bag')
        create_bag = True if bag_config else False
        if create_bag:
            bag_name = bag_config.get(
                'bag_name', ''.join([
                    "deriva_bag", '_',
                    time.strftime("%Y-%m-%d_%H.%M.%S")
                ])).format(**self.envars)
            bag_path = os.path.abspath(os.path.join(self.output_dir, bag_name))
            bag_archiver = bag_config.get('bag_archiver')
            bag_algorithms = bag_config.get('bag_algorithms', ['sha256'])
            bag_metadata = bag_config.get(
                'bag_metadata',
                {"Internal-Sender-Identifier": "deriva@%s" % self.server_url})
            bag_ro = create_bag and stob(bag_config.get('bag_ro', "True"))
            if create_bag:
                bdb.ensure_bag_path_exists(bag_path)
                bag = bdb.make_bag(bag_path,
                                   algs=bag_algorithms,
                                   metadata=bag_metadata)
                if bag_ro:
                    ro_author_name = bag.info.get(
                        "Contact-Name", None if not identity else identity.get(
                            'full_name',
                            identity.get('display_name',
                                         identity.get('id', None))))
                    ro_author_orcid = bag.info.get("Contact-Orcid")
                    ro_manifest = ro.init_ro_manifest(
                        author_name=ro_author_name,
                        author_orcid=ro_author_orcid)
                    bag_metadata.update({BAG_PROFILE_TAG: BDBAG_RO_PROFILE_ID})

        # 3. Process the set of queries by locating, instantiating, and invoking the specified processor(s)
        outputs = dict()
        base_path = bag_path if bag_path else self.output_dir
        for processor in catalog_config['query_processors']:
            processor_name = processor["processor"]
            processor_type = processor.get('processor_type')
            processor_params = processor.get('processor_params')

            try:
                query_processor = find_query_processor(processor_name,
                                                       processor_type)
                processor = query_processor(
                    self.envars,
                    inputs=outputs,
                    bag=create_bag,
                    catalog=self.catalog,
                    store=self.store,
                    base_path=base_path,
                    processor_params=processor_params,
                    remote_file_manifest=remote_file_manifest,
                    ro_manifest=ro_manifest,
                    ro_author_name=ro_author_name,
                    ro_author_orcid=ro_author_orcid,
                    identity=identity,
                    wallet=wallet)
                outputs = processor.process()
            except Exception as e:
                logging.error(format_exception(e))
                if create_bag:
                    bdb.cleanup_bag(bag_path)
                raise

        # 4. Execute anything in the transform processing pipeline, if configured
        transform_processors = self.config.get('transform_processors', [])
        if transform_processors:
            for processor in transform_processors:
                processor_name = processor["processor"]
                processor_type = processor.get('processor_type')
                processor_params = processor.get('processor_params')
                try:
                    transform_processor = find_transform_processor(
                        processor_name, processor_type)
                    processor = transform_processor(
                        self.envars,
                        inputs=outputs,
                        processor_params=processor_params,
                        base_path=base_path,
                        bag=create_bag,
                        ro_manifest=ro_manifest,
                        ro_author_name=ro_author_name,
                        ro_author_orcid=ro_author_orcid,
                        identity=identity,
                        wallet=wallet)
                    outputs = processor.process()
                except Exception as e:
                    logging.error(format_exception(e))
                    raise

        # 5. Create the bag, and archive (serialize) if necessary
        if create_bag:
            try:
                if ro_manifest:
                    ro.write_bag_ro_metadata(ro_manifest, bag_path)
                if not os.path.isfile(remote_file_manifest):
                    remote_file_manifest = None
                bdb.make_bag(
                    bag_path,
                    algs=bag_algorithms,
                    remote_file_manifest=remote_file_manifest if
                    (remote_file_manifest
                     and os.path.getsize(remote_file_manifest) > 0) else None,
                    update=True)
            except Exception as e:
                logging.fatal("Exception while updating bag manifests: %s" %
                              format_exception(e))
                bdb.cleanup_bag(bag_path)
                raise
            finally:
                if remote_file_manifest and os.path.isfile(
                        remote_file_manifest):
                    os.remove(remote_file_manifest)

            logging.info('Created bag: %s' % bag_path)

            if bag_archiver is not None:
                try:
                    archive = bdb.archive_bag(bag_path, bag_archiver.lower())
                    bdb.cleanup_bag(bag_path)
                    outputs = {
                        os.path.basename(archive): {
                            LOCAL_PATH_KEY: archive
                        }
                    }
                except Exception as e:
                    logging.error(
                        "Exception while creating data bag archive: %s" %
                        format_exception(e))
                    raise
            else:
                outputs = {
                    os.path.basename(bag_path): {
                        LOCAL_PATH_KEY: bag_path
                    }
                }

        # 6. Execute anything in the post processing pipeline, if configured
        post_processors = self.config.get('post_processors', [])
        if post_processors:
            for processor in post_processors:
                processor_name = processor["processor"]
                processor_type = processor.get('processor_type')
                processor_params = processor.get('processor_params')
                try:
                    post_processor = find_post_processor(
                        processor_name, processor_type)
                    processor = post_processor(
                        self.envars,
                        inputs=outputs,
                        processor_params=processor_params,
                        identity=identity,
                        wallet=wallet)
                    outputs = processor.process()
                except Exception as e:
                    logging.error(format_exception(e))
                    raise

        return outputs

    def __del__(self):
        for session in self.sessions.values():
            session.close()
Example #23
0
class PyramidalClient(object):
    """Network client for generating pyramidal tiles.
    """

    ## Derived from the ermrest iobox service client

    def __init__(self, **kwargs):
        self.metadata = kwargs.get("metadata")
        self.baseuri = kwargs.get("baseuri")
        o = urlparse.urlparse(self.baseuri)
        self.scheme = o[0]
        host_port = o[1].split(":")
        self.host = host_port[0]
        self.path = o.path
        self.port = None
        if len(host_port) > 1:
            self.port = host_port[1]
        self.dzi = kwargs.get("dzi")
        self.thumbnails = kwargs.get("thumbnails")
        self.czi2dzi = kwargs.get("czi2dzi")
        self.viewer = kwargs.get("viewer")
        self.czirules = kwargs.get("czirules")
        self.showinf = kwargs.get("showinf")
        self.data_scratch = kwargs.get("data_scratch")
        self.cookie = kwargs.get("cookie")
        self.store = HatracStore(self.scheme, self.host,
                                 {'cookie': self.cookie})
        self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                             self.path.split('/')[-1],
                                             {'cookie': self.cookie})
        self.mail_server = kwargs.get("mail_server")
        self.mail_sender = kwargs.get("mail_sender")
        self.mail_receiver = kwargs.get("mail_receiver")
        self.logger = kwargs.get("logger")
        self.logger.debug('Client initialized.')

    """
    Send email notification
    """

    def sendMail(self, subject, text):
        if self.mail_server and self.mail_sender and self.mail_receiver:
            retry = 0
            ready = False
            while not ready:
                try:
                    msg = MIMEText('%s\n\n%s' % (text, mail_footer), 'plain')
                    msg['Subject'] = subject
                    msg['From'] = self.mail_sender
                    msg['To'] = self.mail_receiver
                    s = smtplib.SMTP(self.mail_server)
                    s.sendmail(self.mail_sender, self.mail_receiver.split(','),
                               msg.as_string())
                    s.quit()
                    self.logger.debug('Sent email notification.')
                    ready = True
                except socket.gaierror as e:
                    if e.errno == socket.EAI_AGAIN:
                        time.sleep(100)
                        retry = retry + 1
                        ready = retry > 10
                    else:
                        ready = True
                    if ready:
                        et, ev, tb = sys.exc_info()
                        self.logger.error('got exception "%s"' % str(ev))
                        self.logger.error(
                            '%s' % str(traceback.format_exception(et, ev, tb)))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error('got exception "%s"' % str(ev))
                    self.logger.error(
                        '%s' % str(traceback.format_exception(et, ev, tb)))
                    ready = True

    """
    Start the process for generating pyramidal tiles
    """

    def start(self):
        try:
            self.processHistologicalImages()
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail(
                'FAILURE Tiles: unexpected exception',
                '%s\nThe process might have been stopped\n' %
                str(traceback.format_exception(et, ev, tb)))
            raise

    def processHistologicalImages(self):
        """
        Query for detecting new slides - the most recently first
        """
        url = '/entity/Histological_Images:HE_Slide/!File_Bytes::null::&Pyramid_URL::null::/Processing_Status=in%%20progress;Processing_Status::null::@sort(%s::desc::)' % (
            urlquote('RCT'))

        resp = self.catalog.get(url)
        resp.raise_for_status()
        slides = resp.json()
        slideids = []
        for slide in slides:
            slideids.append(
                (slide['ID'], slide['Filename'], slide['File_URL'],
                 slide['RCT'], slide['File_MD5'], slide['Name'], slide['RID']))

        self.logger.debug('Processing %d HistologicalImages slides(s).' %
                          (len(slideids)))

        for slideId, filename, file_url, creation_time, md5, name, rid in slideids:
            self.logger.debug('Generating pyramidal tiles for the file "%s"' %
                              (filename))
            """
            Extract the file from hatrac
            """
            f = self.getHatracFile(filename, file_url)

            if f == None:
                continue
            """
            Create the directory for the tiles
            """
            year = parse(creation_time).strftime("%Y")
            outdir = '%s/%s/%s' % (self.dzi, year, md5)
            if not os.path.exists(outdir):
                os.makedirs(outdir)
            """
            Convert the file to DZI
            """
            returncode = self.convert2dzi(f, outdir)

            if returncode != 0:
                """
                Update the slide table with the failure result.
                """
                self.updateAttributes(
                    'Histological_Images:HE_Slide', rid,
                    ["Thumbnail", "Processing_Status"], {
                        'RID': rid,
                        'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                        'Processing_Status': 'czi2dzi error'
                    })
                continue
            """
            Generate the thumbnail
            """
            thumbnail, urls = self.writeThumbnailImage(f, year, md5)

            if thumbnail == None:
                """
                Update the slide table with the failure result.
                """
                self.updateAttributes(
                    'Histological_Images:HE_Slide', rid,
                    ["Thumbnail", "Processing_Status"], {
                        'RID': rid,
                        'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                        'Processing_Status': 'DZI failure'
                    })
                continue
            """
            Extract the metadata
            """
            self.logger.debug('Extracting metadata for filename "%s"' %
                              (filename))
            bioformatsClient = BioformatsClient(showinf=self.showinf, \
                                                czirules=self.czirules, \
                                                cziFile=f, \
                                                logger=self.logger)
            try:
                metadata = bioformatsClient.getMetadata()
                if metadata == None:
                    metadata = {}
                self.logger.debug('Metadata: "%s"' % str(metadata))
                os.remove('temp.xml')
            except XMLSyntaxError:
                et, ev, tb = sys.exc_info()
                self.logger.error('got unexpected exception "%s"' % str(ev))
                self.logger.error('%s' %
                                  str(traceback.format_exception(et, ev, tb)))
                self.sendMail(
                    'FAILURE Tiles: XMLSyntaxError',
                    '%s\n' % str(traceback.format_exception(et, ev, tb)))
                metadata = {}

            os.remove(f)
            """
            Update the slide table with the success result.
            """
            self.updateAttributes(
                'Histological_Images:HE_Slide', rid,
                ["Thumbnail", "Pyramid_URL", "Processing_Status", "uri"], {
                    'RID': rid,
                    'Thumbnail': thumbnail,
                    'Pyramid_URL': '/%s?%s' % (self.viewer, urls),
                    'uri': '/%s?%s' % (self.viewer, urls),
                    "Processing_Status": 'success'
                })

            self.logger.debug(
                'SUCCEEDED created the tiles directory for the file "%s".' %
                (filename))
            """
            Update/Create the image entry with the metadata
            """
            obj = {}
            obj['ID'] = slideId
            obj['Name'] = name
            obj['url'] = '/chaise/viewer/#2/Histological_Images:HE_Slide/ID=%d' % slideId
            columns = ['ID', 'Name', 'url']
            for col in self.metadata:
                if col in metadata and metadata[col] != None:
                    columns.append(col)
                    obj[col] = metadata[col]
            """
            Check if we have an update or create
            """
            rid = self.getRID('Histological_Images:HE_Image',
                              'ID=%d' % slideId)
            if rid != None:
                obj['RID'] = rid
                self.updateAttributes('Histological_Images:HE_Image', rid,
                                      columns, obj)
            else:
                self.createEntity('Histological_Images:HE_Image', obj)

            self.logger.debug(
                'SUCCEEDED created the image entry for the file "%s".' %
                (filename))

        self.logger.debug('Ended HistologicalImages Slides Processing.')

    """
    Extract the file from hatrac
    """

    def getHatracFile(self, filename, file_url):
        try:
            hatracFile = '%s/%s' % (self.data_scratch, filename)
            self.store.get_obj(file_url, destfilename=hatracFile)
            self.logger.debug('File "%s", %d bytes.' %
                              (hatracFile, os.stat(hatracFile).st_size))
            return hatracFile
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: write thumbnail ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))
            return None

    """
    Generate the thumbnail
    """

    def writeThumbnailImage(self, filename, year, md5):
        try:
            scanDir = '%s/%s/%s' % (self.dzi, year, md5)
            channels = []
            for channel in os.listdir(scanDir):
                if os.path.isdir('%s%s%s' % (scanDir, os.sep, channel)):
                    channels.append(channel)
            outdir = '%s/%s' % (self.thumbnails, year)
            if not os.path.exists(outdir):
                os.makedirs(outdir)
            shutil.copyfile(
                '%s/%s/%s/%s/0/0_0.jpg' % (self.dzi, year, md5, channels[0]),
                '%s/%s.jpg' % (outdir, md5))
            thumbnail = '/thumbnails/%s/%s.jpg' % (urlquote(year),
                                                   urlquote(md5))
            urls = []
            for channel in channels:
                urls.append('url=/data/%s/%s/%s/ImageProperties.xml' %
                            (year, md5, channel))
            return (thumbnail, '&'.join(urls))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: write thumbnail ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))
            os.remove(filename)
            return (None, None)

    """
    Convert the input file to DZI
    """

    def convert2dzi(self, filename, outdir):
        try:
            currentDirectory = os.getcwd()
            os.chdir(self.dzi)
            args = [self.czi2dzi, filename, outdir]
            p = subprocess.Popen(args,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            stdoutdata, stderrdata = p.communicate()
            returncode = p.returncode
            os.chdir(currentDirectory)

            if returncode != 0:
                self.logger.error(
                    'Can not convert czi to dzi for file "%s".\nstdoutdata: %s\nstderrdata: %s\n'
                    % (filename, stdoutdata, stderrdata))
                self.sendMail(
                    'FAILURE Tiles',
                    'Can not convert czi to dzi for file "%s".\nstdoutdata: %s\nstderrdata: %s\n'
                    % (filename, stdoutdata, stderrdata))
                os.remove(filename)
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: czi2dzi ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))
            os.chdir(currentDirectory)
            self.logger.error(
                'Can not generate pyramidal tiles for the file "%s".\nstdoutdata: %s\nstderrdata: %s\n'
                % (filename, stdoutdata, stderrdata))
            self.sendMail(
                'FAILURE Tiles',
                'Can not generate pyramidal tiles for the file "%s".\nstdoutdata: %s\nstderrdata: %s\n'
                % (filename, stdoutdata, stderrdata))
            returncode = 1

        return returncode

    """
    Update the ermrest attributes
    """

    def updateAttributes(self, path, rid, columns, row):
        """
        Update the ermrest attributes with the row values.
        """
        try:
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/%s/RID;%s' % (path, columns)
            resp = self.catalog.put(url, json=[row])
            resp.raise_for_status()
            self.logger.debug(
                'SUCCEEDED updated the table "%s" for the RID "%s"  with "%s".'
                % (path, rid, json.dumps(row, indent=4)))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))

    """
    Insert a row in a table
    """

    def createEntity(self, path, row):
        """
        Insert the row in the table.
        """
        try:
            url = '/entity/%s' % (path)
            resp = self.catalog.post(url, json=[row])
            resp.raise_for_status()
            self.logger.debug(
                'SUCCEEDED created in the table "%s" the entry "%s".' %
                (path, json.dumps(row, indent=4)))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))

    """
    Check if an entry exist in the table
    Return the RID if it exists, and None otherwise
    """

    def getRID(self, path, predicate):
        """
        Get the RID of the row.
        """
        try:
            RID = None
            url = '/entity/%s/%s' % (path, predicate)
            resp = self.catalog.get(url)
            resp.raise_for_status()
            rows = resp.json()
            if len(rows) == 1:
                RID = rows[0]['RID']
            self.logger.debug('RID for the url = "%s" is "%s".' % (url, RID))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))

        return RID
Example #24
0
class DerivaUpload(object):
    """
    Base class for upload tasks. Encapsulates a catalog instance and a hatrac store instance and provides some common
    and reusable functions.

    This class is not intended to be instantiated directly, but rather extended by a specific implementation.
    """

    DefaultConfigFileName = "config.json"
    DefaultServerListFileName = "servers.json"
    DefaultTransferStateFileName = "transfers.json"

    def __init__(self, config_file=None, credential_file=None, server=None):
        self.server_url = None
        self.catalog = None
        self.store = None
        self.config = None
        self.credentials = None
        self.asset_mappings = None
        self.transfer_state = dict()
        self.transfer_state_fp = None
        self.cancelled = False
        self.metadata = dict()

        self.file_list = OrderedDict()
        self.file_status = OrderedDict()
        self.skipped_files = set()
        self.override_config_file = config_file
        self.override_credential_file = credential_file
        self.server = self.getDefaultServer() if not server else server
        self.initialize()

    def __del__(self):
        self.cleanupTransferState()

    def initialize(self, cleanup=False):
        info = "%s v%s [Python %s, %s]" % (self.__class__.__name__, VERSION,
                                           platform.python_version(),
                                           platform.platform(aliased=True))
        logging.info("Initializing uploader: %s" % info)

        # cleanup invalidates the current configuration and credentials in addition to clearing internal state
        if cleanup:
            self.cleanup()
        # reset just clears the internal state
        else:
            self.reset()

        if not self.server:
            logging.warning(
                "A server was not specified and an internal default has not been set."
            )
            return

        # server variable initialization
        protocol = self.server.get('protocol', 'https')
        host = self.server.get('host', '')
        self.server_url = protocol + "://" + host
        catalog_id = self.server.get("catalog_id", "1")
        session_config = self.server.get('session')

        # overriden credential initialization
        if self.override_credential_file:
            self.credentials = get_credential(host, self.override_config_file)

        # catalog and file store initialization
        if self.catalog:
            del self.catalog
        self.catalog = ErmrestCatalog(protocol,
                                      host,
                                      catalog_id,
                                      self.credentials,
                                      session_config=session_config)
        if self.store:
            del self.store
        self.store = HatracStore(protocol,
                                 host,
                                 self.credentials,
                                 session_config=session_config)

        # transfer state initialization
        self.loadTransferState()
        """
         Configuration initialization - this is a bit complex because we allow for:
             1. Run-time overriding of the config file location.
             2. Sub-classes of this class to bundle their own default configuration files in an arbitrary location.
             3. The updating of already deployed configuration files if bundled internal defaults are newer.             
        """
        config_file = self.override_config_file if self.override_config_file else None
        # 1. If we don't already have a valid (i.e., overridden) path to a config file...
        if not (config_file and os.path.isfile(config_file)):
            # 2. Get the currently deployed config file path, which could possibly be overridden by subclass
            config_file = self.getDeployedConfigFilePath()
            # 3. If the deployed default path is not valid, OR, it is valid AND is older than the bundled default
            if (not (config_file and os.path.isfile(config_file))
                    or self.isFileNewer(self.getDefaultConfigFilePath(),
                                        self.getDeployedConfigFilePath())):
                # 4. If we can locate a bundled default config file,
                if os.path.isfile(self.getDefaultConfigFilePath()):
                    # 4.1 Copy the bundled default config file to the deployment-specific config path
                    copy_config(self.getDefaultConfigFilePath(), config_file)
                else:
                    # 4.2 Otherwise, fallback to writing a failsafe default based on internal hardcoded settings
                    write_config(config_file, DefaultConfig)
        # 5. Finally, read the resolved configuration file into a config object
        self._update_internal_config(read_config(config_file))

    def _update_internal_config(self, config):
        """This updates the internal state of the uploader based on the config.
        """
        self.config = config
        # uploader initialization from configuration
        self.asset_mappings = self.config.get('asset_mappings', [])
        mu.add_types(self.config.get('mime_overrides'))

    def cancel(self):
        self.cancelled = True

    def reset(self):
        self.metadata.clear()
        self.file_list.clear()
        self.file_status.clear()
        self.skipped_files.clear()
        self.cancelled = False

    def cleanup(self):
        self.reset()
        self.config = None
        self.credentials = None
        self.cleanupTransferState()

    def setServer(self, server):
        cleanup = self.server != server
        self.server = server
        self.initialize(cleanup)

    def setCredentials(self, credentials):
        host = self.server['host']
        self.credentials = credentials
        self.catalog.set_credentials(self.credentials, host)
        self.store.set_credentials(self.credentials, host)

    @classmethod
    def getDefaultServer(cls):
        servers = cls.getServers()
        for server in servers:
            lower = {k.lower(): v for k, v in server.items()}
            if lower.get("default", False):
                return server
        return servers[0] if len(servers) else {}

    @classmethod
    def getServers(cls):
        """
        This method must be implemented by subclasses.
        """
        raise NotImplementedError(
            "This method must be implemented by a subclass.")

    @classmethod
    def getVersion(cls):
        """
        This method must be implemented by subclasses.
        """
        raise NotImplementedError(
            "This method must be implemented by a subclass.")

    @classmethod
    def getConfigPath(cls):
        """
        This method must be implemented by subclasses.
        """
        raise NotImplementedError(
            "This method must be implemented by a subclass.")

    @classmethod
    def getDeployedConfigPath(cls):
        return os.path.expanduser(os.path.normpath(cls.getConfigPath()))

    def getVersionCompatibility(self):
        return self.config.get("version_compatibility", list())

    def isVersionCompatible(self):
        compatibility = self.getVersionCompatibility()
        if len(compatibility) > 0:
            return vu.is_compatible(self.getVersion(), compatibility)
        else:
            return True

    @classmethod
    def getFileDisplayName(cls, file_path, asset_mapping=None):
        return os.path.basename(file_path)

    @staticmethod
    def isFileNewer(src, dst):
        if not (os.path.isfile(src) and os.path.isfile(dst)):
            return False

        # This comparison wont work with PyInstaller single-file bundles because the bundle is extracted to a temp dir
        # and every timestamp for every file in the bundle is reset to the bundle extraction/creation time.
        if getattr(sys, 'frozen', False):
            prefix = os.path.sep + "_MEI"
            if prefix in src:
                return False

        src_mtime = os.path.getmtime(os.path.abspath(src))
        dst_mtime = os.path.getmtime(os.path.abspath(dst))
        return src_mtime > dst_mtime

    @staticmethod
    def getFileSize(file_path):
        return os.path.getsize(file_path)

    @staticmethod
    def guessContentType(file_path):
        return mu.guess_content_type(file_path)

    @staticmethod
    def getFileHashes(file_path, hashes=frozenset(['md5'])):
        return hu.compute_file_hashes(file_path, hashes)

    @staticmethod
    def getCatalogTable(asset_mapping, metadata_dict=None):
        schema_name, table_name = asset_mapping.get('target_table',
                                                    [None, None])
        if not (schema_name and table_name):
            metadata_dict_lower = {
                k.lower(): v
                for k, v in metadata_dict.items()
            }
            schema_name = metadata_dict_lower.get("schema")
            table_name = metadata_dict_lower.get("table")
        if not (schema_name and table_name):
            raise ValueError(
                "Unable to determine target catalog table for asset type.")
        return '%s:%s' % (urlquote(schema_name), urlquote(table_name))

    @staticmethod
    def interpolateDict(src, dst, allowNone=False):
        if not (isinstance(src, dict) and isinstance(dst, dict)):
            raise ValueError(
                "Invalid input parameter type(s): (src = %s, dst = %s), expected (dict, dict)"
                % (type(src).__name__, type(dst).__name__))

        dst = dst.copy()
        # prune None values from the src, we don't want those to be replaced with the string 'None' in the dest
        empty = [k for k, v in src.items() if v is None]
        for k in empty:
            del src[k]
        # perform the string replacement for the values in the destination dict
        for k, v in dst.items():
            try:
                value = v.format(**src)
            except KeyError:
                value = v
                if value:
                    if value.startswith('{') and value.endswith('}'):
                        value = None
            dst.update({k: value})
        # remove all None valued entries in the dest, if disallowed
        if not allowNone:
            empty = [k for k, v in dst.items() if v is None]
            for k in empty:
                del dst[k]

        return dst

    @staticmethod
    def pruneDict(src, dst, stringify=True):
        dst = dst.copy()
        for k in dst.keys():
            value = src.get(k)
            dst[k] = str(value) if (stringify and value is not None) else value
        return dst

    def getCurrentConfigFilePath(self):
        return self.override_config_file if self.override_config_file else self.getDeployedConfigFilePath(
        )

    def getDefaultConfigFilePath(self):
        return os.path.normpath(
            resource_path(os.path.join("conf", self.DefaultConfigFileName)))

    def getDeployedConfigFilePath(self):
        return os.path.join(self.getDeployedConfigPath(),
                            self.server.get('host', ''),
                            self.DefaultConfigFileName)

    def getDeployedTransferStateFilePath(self):
        return os.path.join(self.getDeployedConfigPath(),
                            self.server.get('host', ''),
                            self.DefaultTransferStateFileName)

    def getRemoteConfig(self):
        catalog_config = CatalogConfig.fromcatalog(self.catalog)
        return catalog_config.annotation_obj(
            "tag:isrd.isi.edu,2017:bulk-upload")

    def getUpdatedConfig(self):
        # if we are using an overridden config file, skip the update check
        if self.override_config_file:
            return

        logging.info("Checking for updated configuration...")
        remote_config = self.getRemoteConfig()
        if not remote_config:
            logging.info(
                "Remote configuration not present, using default local configuration file."
            )
            return

        deployed_config_file_path = self.getDeployedConfigFilePath()
        if os.path.isfile(deployed_config_file_path):
            current_md5 = hu.compute_file_hashes(deployed_config_file_path,
                                                 hashes=['md5'])['md5'][0]
        else:
            logging.info("Local config not found.")
            current_md5 = None
        tempdir = tempfile.mkdtemp(prefix="deriva_upload_")
        if os.path.exists(tempdir):
            updated_config_path = os.path.abspath(
                os.path.join(tempdir, DerivaUpload.DefaultConfigFileName))
            with io.open(updated_config_path,
                         'w',
                         newline='\n',
                         encoding='utf-8') as config:
                config.write(
                    json.dumps(remote_config,
                               ensure_ascii=False,
                               sort_keys=True,
                               separators=(',', ': '),
                               indent=2))
            new_md5 = hu.compute_file_hashes(updated_config_path,
                                             hashes=['md5'])['md5'][0]
            if current_md5 != new_md5:
                logging.info("Updated configuration found.")
                config = read_config(updated_config_path)
                self._update_internal_config(config)
            else:
                logging.info("Configuration is up-to-date.")
                config = None
            shutil.rmtree(tempdir, ignore_errors=True)

            return config

    def getFileStatusAsArray(self):
        result = list()
        for key in self.file_status.keys():
            item = {"File": key}
            item.update(self.file_status[key])
            result.append(item)
        return result

    def validateFile(self, root, path, name):
        file_path = os.path.normpath(os.path.join(path, name))
        asset_group, asset_mapping, groupdict = self.getAssetMapping(file_path)
        if not asset_mapping:
            return None

        return asset_group, asset_mapping, groupdict, file_path

    def scanDirectory(self, root, abort_on_invalid_input=False):
        """

        :param root:
        :param abort_on_invalid_input:
        :return:
        """
        root = os.path.abspath(root)
        if not os.path.isdir(root):
            raise ValueError("Invalid directory specified: [%s]" % root)

        logging.info("Scanning files in directory [%s]..." % root)
        file_list = OrderedDict()
        for path, dirs, files in walk(root):
            for file_name in files:
                file_path = os.path.normpath(os.path.join(path, file_name))
                file_entry = self.validateFile(root, path, file_name)
                if not file_entry:
                    logging.info(
                        "Skipping file: [%s] -- Invalid file type or directory location."
                        % file_path)
                    self.skipped_files.add(file_path)
                    if abort_on_invalid_input:
                        raise ValueError("Invalid input detected, aborting.")
                else:
                    asset_group = file_entry[0]
                    group_list = file_list.get(asset_group, [])
                    group_list.append(file_entry)
                    file_list[asset_group] = group_list

        # make sure that file entries in both self.file_list and self.file_status are ordered by the declared order of
        # the asset_mapping for the file
        for group in sorted(file_list.keys()):
            self.file_list[group] = file_list[group]
            for file_entry in file_list[group]:
                file_path = file_entry[3]
                logging.info("Including file: [%s]." % file_path)
                status = self.getTransferStateStatus(file_path)
                if status:
                    self.file_status[file_path] = FileUploadState(
                        UploadState.Paused, status)._asdict()
                else:
                    self.file_status[file_path] = FileUploadState(
                        UploadState.Pending, "Pending")._asdict()

    def getAssetMapping(self, file_path):
        """
        :param file_path:
        :return:
        """
        asset_group = -1
        for asset_type in self.asset_mappings:
            asset_group += 1
            groupdict = dict()
            dir_pattern = asset_type.get('dir_pattern', '')
            ext_pattern = asset_type.get('ext_pattern', '')
            file_pattern = asset_type.get('file_pattern', '')
            path = file_path.replace("\\", "/")
            if dir_pattern:
                match = re.search(dir_pattern, path)
                if not match:
                    logging.debug(
                        "The dir_pattern \"%s\" failed to match the input path [%s]"
                        % (dir_pattern, path))
                    continue
                groupdict.update(match.groupdict())
            if ext_pattern:
                match = re.search(ext_pattern, path, re.IGNORECASE)
                if not match:
                    logging.debug(
                        "The ext_pattern \"%s\" failed to match the input path [%s]"
                        % (ext_pattern, path))
                    continue
                groupdict.update(match.groupdict())
            if file_pattern:
                match = re.search(file_pattern, path)
                if not match:
                    logging.debug(
                        "The file_pattern \"%s\" failed to match the input path [%s]"
                        % (file_pattern, path))
                    continue
                groupdict.update(match.groupdict())

            return asset_group, asset_type, groupdict

        return None, None, None

    def uploadFiles(self, status_callback=None, file_callback=None):
        for group, assets in self.file_list.items():
            for asset_group_num, asset_mapping, groupdict, file_path in assets:
                if self.cancelled:
                    self.file_status[file_path] = FileUploadState(
                        UploadState.Cancelled, "Cancelled by user")._asdict()
                    continue
                try:
                    self.file_status[file_path] = FileUploadState(
                        UploadState.Running, "In-progress")._asdict()
                    if status_callback:
                        status_callback()
                    self.uploadFile(file_path, asset_mapping, groupdict,
                                    file_callback)
                    self.file_status[file_path] = FileUploadState(
                        UploadState.Success, "Complete")._asdict()
                except HatracJobPaused:
                    status = self.getTransferStateStatus(file_path)
                    if status:
                        self.file_status[file_path] = FileUploadState(
                            UploadState.Paused,
                            "Paused: %s" % status)._asdict()
                    continue
                except HatracJobTimeout:
                    status = self.getTransferStateStatus(file_path)
                    if status:
                        self.file_status[file_path] = FileUploadState(
                            UploadState.Timeout, "Timeout")._asdict()
                    continue
                except HatracJobAborted:
                    self.file_status[file_path] = FileUploadState(
                        UploadState.Aborted, "Aborted by user")._asdict()
                except:
                    (etype, value, traceback) = sys.exc_info()
                    self.file_status[file_path] = FileUploadState(
                        UploadState.Failed, format_exception(value))._asdict()
                self.delTransferState(file_path)
                if status_callback:
                    status_callback()

        failed_uploads = dict()
        for key, value in self.file_status.items():
            if (value["State"]
                    == UploadState.Failed) or (value["State"]
                                               == UploadState.Timeout):
                failed_uploads[key] = value["Status"]

        if self.skipped_files:
            logging.warning(
                "The following file(s) were skipped because they did not satisfy the matching criteria "
                "of the configuration:\n\n%s\n" %
                '\n'.join(sorted(self.skipped_files)))

        if failed_uploads:
            logging.warning(
                "The following file(s) failed to upload due to errors:\n\n%s\n"
                % '\n'.join([
                    "%s -- %s" % (key, failed_uploads[key])
                    for key in sorted(failed_uploads.keys())
                ]))
            raise RuntimeError(
                "One or more file(s) failed to upload due to errors.")

    def uploadFile(self,
                   file_path,
                   asset_mapping,
                   match_groupdict,
                   callback=None):
        """
        Primary API subclass function.
        :param file_path:
        :param asset_mapping:
        :param match_groupdict:
        :param callback:
        :return:
        """
        logging.info("Processing file: [%s]" % file_path)

        if asset_mapping.get("asset_type", "file") == "table":
            self._uploadTable(file_path, asset_mapping, match_groupdict)
        else:
            self._uploadAsset(file_path, asset_mapping, match_groupdict,
                              callback)

    def _uploadAsset(self,
                     file_path,
                     asset_mapping,
                     match_groupdict,
                     callback=None):

        # 1. Populate metadata by querying the catalog
        self._queryFileMetadata(file_path, asset_mapping, match_groupdict)

        # 2. If "create_record_before_upload" specified in asset_mapping, check for an existing record, creating a new
        #    one if necessary. Otherwise delay this logic until after the file upload.
        record = None
        if stob(asset_mapping.get("create_record_before_upload", False)):
            record = self._getFileRecord(asset_mapping)

        # 3. Perform the Hatrac upload
        self._getFileHatracMetadata(asset_mapping)
        hatrac_options = asset_mapping.get("hatrac_options", {})
        versioned_uri = \
            self._hatracUpload(self.metadata["URI"],
                               file_path,
                               md5=self.metadata.get("md5_base64"),
                               sha256=self.metadata.get("sha256_base64"),
                               content_type=self.guessContentType(file_path),
                               content_disposition=self.metadata.get("content-disposition"),
                               chunked=True,
                               create_parents=stob(hatrac_options.get("create_parents", True)),
                               allow_versioning=stob(hatrac_options.get("allow_versioning", True)),
                               callback=callback)
        logging.debug("Hatrac upload successful. Result object URI: %s" %
                      versioned_uri)
        if stob(hatrac_options.get("versioned_uris", True)):
            self.metadata["URI"] = versioned_uri
        else:
            self.metadata["URI"] = versioned_uri.rsplit(":")[0]
        self.metadata["URI_urlencoded"] = urlquote(self.metadata["URI"])

        # 3. Check for an existing record and create a new one if necessary
        if not record:
            record = self._getFileRecord(asset_mapping)

        # 4. Update an existing record, if necessary
        column_map = asset_mapping.get("column_map", {})
        updated_record = self.interpolateDict(self.metadata, column_map)
        if updated_record != record:
            logging.info("Updating catalog for file [%s]" %
                         self.getFileDisplayName(file_path))
            self._catalogRecordUpdate(self.metadata['target_table'], record,
                                      updated_record)

    def _uploadTable(self,
                     file_path,
                     asset_mapping,
                     match_groupdict,
                     callback=None):
        if self.cancelled:
            return None

        self._initFileMetadata(file_path, asset_mapping, match_groupdict)
        try:
            default_columns = asset_mapping.get("default_columns")
            if not default_columns:
                default_columns = self.catalog.getDefaultColumns(
                    {}, self.metadata['target_table'])
            default_param = (
                '?defaults=%s' %
                ','.join(default_columns)) if len(default_columns) > 0 else ''
            file_ext = self.metadata['file_ext']
            if file_ext == 'csv':
                headers = {'content-type': 'text/csv'}
            elif file_ext == 'json':
                headers = {'content-type': 'application/json'}
            else:
                raise CatalogCreateError(
                    "Unsupported file type for catalog bulk upload: %s" %
                    file_ext)
            with open(file_path, "rb") as fp:
                result = self.catalog.post(
                    '/entity/%s%s' %
                    (self.metadata['target_table'], default_param),
                    fp,
                    headers=headers)
                return result
        except:
            (etype, value, traceback) = sys.exc_info()
            raise CatalogCreateError(format_exception(value))

    def _getFileRecord(self, asset_mapping):
        """
        Helper function that queries the catalog to get a record linked to the asset, or create it if it doesn't exist.
        :return: the file record
        """
        column_map = asset_mapping.get("column_map", {})
        rqt = asset_mapping['record_query_template']
        try:
            path = rqt.format(**self.metadata)
        except KeyError as e:
            raise ConfigurationError(
                "Record query template substitution error: %s" %
                format_exception(e))
        result = self.catalog.get(path).json()
        if result:
            self._updateFileMetadata(result[0])
            return self.pruneDict(result[0], column_map)
        else:
            row = self.interpolateDict(self.metadata, column_map)
            result = self._catalogRecordCreate(self.metadata['target_table'],
                                               row)
            if result:
                self._updateFileMetadata(result[0])
            return self.interpolateDict(self.metadata,
                                        column_map,
                                        allowNone=True)

    def _urlEncodeMetadata(self, safe_overrides=None):
        urlencoded = dict()
        if not safe_overrides:
            safe_overrides = dict()
        for k, v in self.metadata.items():
            if k.endswith("_urlencoded"):
                continue
            urlencoded[k + "_urlencoded"] = urlquote(str(v),
                                                     safe_overrides.get(k, ""))
        self._updateFileMetadata(urlencoded)

    def _initFileMetadata(self, file_path, asset_mapping, match_groupdict):
        self.metadata.clear()
        self._updateFileMetadata(match_groupdict)

        self.metadata['target_table'] = self.getCatalogTable(
            asset_mapping, match_groupdict)
        self.metadata["file_name"] = self.getFileDisplayName(file_path)
        self.metadata["file_size"] = self.getFileSize(file_path)

        self._urlEncodeMetadata(
            asset_mapping.get("url_encoding_safe_overrides"))

    def _updateFileMetadata(self, src, strict=False):
        if not (isinstance(src, dict)):
            ValueError(
                "Invalid input parameter type(s): (src = %s), expected (dict)"
                % type(src).__name__)
        if strict:
            for k in src.keys():
                if k in UploadMetadataReservedKeyNames:
                    logging.warning(
                        "Context metadata update specified reserved key name [%s], "
                        "ignoring value: %s " % (k, src[k]))
                    del src[k]
        self.metadata.update(src)

    def _queryFileMetadata(self, file_path, asset_mapping, match_groupdict):
        """
        Helper function that queries the catalog to get required metadata for a given file/asset
        """
        file_name = self.getFileDisplayName(file_path)
        logging.info("Computing metadata for file: [%s]." % file_name)
        self._initFileMetadata(file_path, asset_mapping, match_groupdict)

        logging.info("Computing checksums for file: [%s]. Please wait..." %
                     file_name)
        hashes = self.getFileHashes(
            file_path, asset_mapping.get('checksum_types', ['md5', 'sha256']))
        for alg, checksum in hashes.items():
            alg = alg.lower()
            self.metadata[alg] = checksum[0]
            self.metadata[alg + "_base64"] = checksum[1]

        for uri in asset_mapping.get("metadata_query_templates", []):
            try:
                path = uri.format(**self.metadata)
            except KeyError as e:
                raise RuntimeError(
                    "Metadata query template substitution error: %s" %
                    format_exception(e))
            result = self.catalog.get(path).json()
            if result:
                self._updateFileMetadata(result[0], True)
                self._urlEncodeMetadata(
                    asset_mapping.get("url_encoding_safe_overrides"))
            else:
                raise RuntimeError(
                    "Metadata query did not return any results: %s" % path)

        self._getFileExtensionMetadata(self.metadata.get("file_ext"))

        for k, v in asset_mapping.get("column_value_templates", {}).items():
            try:
                self.metadata[k] = v.format(**self.metadata)
            except KeyError as e:
                logging.warning(
                    "Column value template substitution error: %s" %
                    format_exception(e))
                continue
        self._urlEncodeMetadata(
            asset_mapping.get("url_encoding_safe_overrides"))

    def _getFileExtensionMetadata(self, ext):
        ext_map = self.config.get("file_ext_mappings", {})
        entry = ext_map.get(ext)
        if entry:
            self._updateFileMetadata(entry)

    def _getFileHatracMetadata(self, asset_mapping):
        try:
            hatrac_templates = asset_mapping["hatrac_templates"]
            # URI is required
            self.metadata["URI"] = hatrac_templates["hatrac_uri"].format(
                **self.metadata)
            # overridden content-disposition is optional
            content_disposition = hatrac_templates.get("content-disposition")
            self.metadata["content-disposition"] = \
                None if not content_disposition else content_disposition.format(**self.metadata)
            self._urlEncodeMetadata(
                asset_mapping.get("url_encoding_safe_overrides"))
        except KeyError as e:
            raise ConfigurationError("Hatrac template substitution error: %s" %
                                     format_exception(e))

    def _hatracUpload(self,
                      uri,
                      file_path,
                      md5=None,
                      sha256=None,
                      content_type=None,
                      content_disposition=None,
                      chunked=True,
                      create_parents=True,
                      allow_versioning=True,
                      callback=None):

        # check if there is already an in-progress transfer for this file,
        # and if so, that the local file has not been modified since the original upload job was created
        can_resume = False
        transfer_state = self.getTransferState(file_path)
        if transfer_state:
            content_md5 = transfer_state.get("content-md5")
            content_sha256 = transfer_state.get("content-sha256")
            if content_md5 or content_sha256:
                if (md5 == content_md5) or (sha256 == content_sha256):
                    can_resume = True

        if transfer_state and can_resume:
            logging.info(
                "Resuming upload (%s) of file: [%s] to host %s. Please wait..."
                % (self.getTransferStateStatus(file_path), file_path,
                   transfer_state.get("host")))
            path = transfer_state["target"]
            job_id = transfer_state['url'].rsplit("/", 1)[1]
            if not (transfer_state["total"] == transfer_state["completed"]):
                self.store.put_obj_chunked(
                    path,
                    file_path,
                    job_id,
                    callback=callback,
                    start_chunk=transfer_state["completed"])
            return self.store.finalize_upload_job(path, job_id)
        else:
            logging.info("Uploading file: [%s] to host %s. Please wait..." %
                         (self.getFileDisplayName(file_path), self.server_url))
            return self.store.put_loc(uri,
                                      file_path,
                                      md5=md5,
                                      sha256=sha256,
                                      content_type=content_type,
                                      content_disposition=content_disposition,
                                      chunked=chunked,
                                      create_parents=create_parents,
                                      allow_versioning=allow_versioning,
                                      callback=callback)

    def _catalogRecordCreate(self, catalog_table, row, default_columns=None):
        """

        :param catalog_table:
        :param row:
        :param default_columns:
        :return:
        """
        if self.cancelled:
            return None

        try:
            missing = self.catalog.validateRowColumns(row, catalog_table)
            if missing:
                raise CatalogCreateError(
                    "Unable to update catalog entry because one or more specified columns do not exist in the "
                    "target table: [%s]" % ','.join(missing))
            if not default_columns:
                default_columns = self.catalog.getDefaultColumns(
                    row, catalog_table)
            default_param = (
                '?defaults=%s' %
                ','.join(default_columns)) if len(default_columns) > 0 else ''
            # for default in default_columns:
            #    row[default] = None
            create_uri = '/entity/%s%s' % (catalog_table, default_param)
            logging.debug(
                "Attempting catalog record create [%s] with data: %s" %
                (create_uri, json.dumps(row)))
            return self.catalog.post(create_uri, json=[row]).json()
        except:
            (etype, value, traceback) = sys.exc_info()
            raise CatalogCreateError(format_exception(value))

    def _catalogRecordUpdate(self, catalog_table, old_row, new_row):
        """

        :param catalog_table:
        :param new_row:
        :param old_row:
        :return:
        """
        if self.cancelled:
            return None

        try:
            keys = sorted(list(new_row.keys()))
            old_keys = sorted(list(old_row.keys()))
            if keys != old_keys:
                raise RuntimeError(
                    "Cannot update catalog - "
                    "new row column list and old row column list do not match: New: %s != Old: %s"
                    % (keys, old_keys))
            combined_row = {
                'o%d' % i: old_row[keys[i]]
                for i in range(len(keys))
            }
            combined_row.update(
                {'n%d' % i: new_row[keys[i]]
                 for i in range(len(keys))})
            update_uri = '/attributegroup/%s/%s;%s' % (catalog_table, ','.join(
                ["o%d:=%s" % (i, urlquote(keys[i]))
                 for i in range(len(keys))]), ','.join([
                     "n%d:=%s" % (i, urlquote(keys[i]))
                     for i in range(len(keys))
                 ]))
            logging.debug(
                "Attempting catalog record update [%s] with data: %s" %
                (update_uri, json.dumps(combined_row)))
            return self.catalog.put(update_uri, json=[combined_row]).json()
        except:
            (etype, value, traceback) = sys.exc_info()
            raise CatalogUpdateError(format_exception(value))

    def defaultFileCallback(self, **kwargs):
        completed = kwargs.get("completed")
        total = kwargs.get("total")
        file_path = kwargs.get("file_path")
        file_name = os.path.basename(file_path) if file_path else ""
        job_info = kwargs.get("job_info", {})
        job_info.update()
        if completed and total:
            file_name = " [%s]" % file_name
            job_info.update({
                "completed": completed,
                "total": total,
                "host": kwargs.get("host")
            })
            status = "Uploading file%s: %d%% complete" % (
                file_name,
                round(((float(completed) / float(total)) % 100) * 100))
            self.setTransferState(file_path, job_info)
        else:
            summary = kwargs.get("summary", "")
            file_name = "Uploaded file: [%s] " % file_name
            status = file_name  # + summary
        if status:
            # logging.debug(status)
            pass
        if self.cancelled:
            return -1

        return True

    def loadTransferState(self):
        transfer_state_file_path = self.getDeployedTransferStateFilePath()
        transfer_state_dir = os.path.dirname(transfer_state_file_path)
        try:
            if not os.path.isdir(transfer_state_dir):
                try:
                    os.makedirs(transfer_state_dir)
                except OSError as error:
                    if error.errno != errno.EEXIST:
                        raise

            if not os.path.isfile(transfer_state_file_path):
                with open(transfer_state_file_path, "w") as tsfp:
                    json.dump(self.transfer_state, tsfp)

            self.transfer_state_fp = \
                open(transfer_state_file_path, 'r+')
            self.transfer_state = json.load(self.transfer_state_fp,
                                            object_pairs_hook=OrderedDict)
        except Exception as e:
            logging.warning(
                "Unable to read transfer state file, transfer checkpointing will not be available. "
                "Error: %s" % format_exception(e))

    def getTransferState(self, file_path):
        return self.transfer_state.get(file_path)

    def setTransferState(self, file_path, transfer_state):
        self.transfer_state[file_path] = transfer_state
        self.writeTransferState()

    def delTransferState(self, file_path):
        transfer_state = self.getTransferState(file_path)
        if transfer_state:
            del self.transfer_state[file_path]
        self.writeTransferState()

    def writeTransferState(self):
        if not self.transfer_state_fp:
            return
        try:
            self.transfer_state_fp.seek(0, 0)
            self.transfer_state_fp.truncate()
            json.dump(self.transfer_state, self.transfer_state_fp, indent=2)
            self.transfer_state_fp.flush()
        except Exception as e:
            logging.warning("Unable to write transfer state file: %s" %
                            format_exception(e))

    def cleanupTransferState(self):
        if self.transfer_state_fp and not self.transfer_state_fp.closed:
            try:
                self.transfer_state_fp.flush()
                self.transfer_state_fp.close()
            except Exception as e:
                logging.warning(
                    "Unable to flush/close transfer state file: %s" %
                    format_exception(e))

    def getTransferStateStatus(self, file_path):
        transfer_state = self.getTransferState(file_path)
        if transfer_state:
            return "%d%% complete" % (round(
                ((float(transfer_state["completed"]) /
                  float(transfer_state["total"])) % 100) * 100))
        return None
Example #25
0
class DerivaHatracCLI (BaseCLI):
    """Deriva Hatrac Command-line Interface.
    """
    def __init__(self, description, epilog):
        """Initializes the CLI.
        """
        super(DerivaHatracCLI, self).__init__(description, epilog, VERSION)

        # initialized after argument parsing
        self.args = None
        self.host = None
        self.resource = None
        self.store = None

        # parent arg parser
        self.remove_options(['--config-file', '--credential-file'])
        subparsers = self.parser.add_subparsers(title='sub-commands', dest='subcmd')

        # list parser
        ls_parser = subparsers.add_parser('ls', help="list the elements of a namespace")
        ls_parser.add_argument("resource", metavar="<path>", type=str, help="namespace path")
        ls_parser.set_defaults(func=self.list)

        # mkdir parser
        mkdir_parser = subparsers.add_parser('mkdir', help="create a namespace")
        mkdir_parser.add_argument("-p", "--parents", action="store_true",
                                  help="Create intermediate parent namespaces as required")
        mkdir_parser.add_argument("resource", metavar="<path>", type=str, help="namespace path")
        mkdir_parser.set_defaults(func=self.mkdir)

        # rmdir parser
        rmdir_parser = subparsers.add_parser('rmdir', help="remove a namespace")
        rmdir_parser.add_argument("resource", metavar="<path>", type=str, help="namespace path")
        rmdir_parser.set_defaults(func=self.rmdir)

        # getacl parser
        getacl_parser = subparsers.add_parser('getacl', help="get ACL")
        getacl_parser.add_argument("resource", metavar="<path>", type=str, help="object or namespace path")
        getacl_parser.add_argument("--access", default=None, metavar="<access-mode>",
                                   help="Optionally specify 'access' mode.")
        getacl_parser.add_argument("--role", default=None, metavar="<role>",
                                   help="Optionally specify 'role'. Must specify 'access' with this option.")
        getacl_parser.set_defaults(func=self.getacl)

        # setacl parser
        setacl_parser = subparsers.add_parser('setacl', help="set ACL")
        setacl_parser.add_argument("resource", metavar="<path>", type=str, help="object or namespace path")
        setacl_parser.add_argument("access", metavar="<access-mode>", help="access mode")
        setacl_parser.add_argument("roles", nargs='+', metavar="<role>", help="role")
        setacl_parser.add_argument("--add", action="store_true", help="add a single role to the ACL")
        setacl_parser.set_defaults(func=self.setacl)

        # detacl parser
        delacl_parser = subparsers.add_parser('delacl', help="delete ACL")
        delacl_parser.add_argument("resource", metavar="<path>", type=str, help="object or namespace path")
        delacl_parser.add_argument("access", metavar="<access-mode>", help="access mode")
        delacl_parser.add_argument("role", nargs='?', metavar="<role>", help="role")
        delacl_parser.set_defaults(func=self.delacl)

        # getobj parser
        getobj_parser = subparsers.add_parser('get', help="get object")
        getobj_parser.add_argument("resource", metavar="<path>", type=str, help="object path")
        getobj_parser.add_argument('outfile', metavar="<outfile>", nargs='?', type=str, help="output filename or -")
        getobj_parser.set_defaults(func=self.getobj)

        # putobj parser
        putobj_parser = subparsers.add_parser('put', help="put object")
        putobj_parser.add_argument('infile', metavar="<infile>", type=str, help="input filename")
        putobj_parser.add_argument("resource", metavar="<path>", type=str, help="object path")
        putobj_parser.add_argument("--content-type", metavar="<type>", type=str, help="HTTP Content-Type header value")
        putobj_parser.add_argument("--parents", action="store_true",
                                   help="Create intermediate parent namespaces as required")
        putobj_parser.set_defaults(func=self.putobj)

        # delobj parser
        delobj_parser = subparsers.add_parser('del', help="delete object")
        delobj_parser.add_argument("resource", metavar="<path>", type=str, help="object path")
        delobj_parser.set_defaults(func=self.delobj)

    @staticmethod
    def _get_credential(host_name, token=None):
        if token:
            return {"cookie": "webauthn={t}".format(t=token)}
        else:
            return get_credential(host_name)

    def _post_parser_init(self, args):
        """Shared initialization for all sub-commands.
        """
        self.host = args.host if args.host else 'localhost'
        self.resource = urlquote(args.resource, '/')
        self.store = HatracStore('https', args.host, DerivaHatracCLI._get_credential(self.host, args.token))

    def list(self, args):
        """Implements the list sub-command.
        """
        try:
            namespaces = self.store.retrieve_namespace(self.resource)
            for name in namespaces:
                print(name)
        except HTTPError as e:
            if e.response.status_code == requests.codes.not_found:
                raise ResourceException('No such object or namespace', e)
            elif e.response.status_code != requests.codes.conflict:
                # 'conflict' just means the namespace has no contents - ok
                raise e
        except ValueError as e:
            raise ResourceException('Not a namespace', e)

    def mkdir(self, args):
        """Implements the mkdir sub-command.
        """
        try:
            self.store.create_namespace(self.resource, parents=args.parents)
        except HTTPError as e:
            if e.response.status_code == requests.codes.not_found:
                raise ResourceException("Parent namespace not found (use '--parents' to create parent namespace)", e)
            elif e.response.status_code == requests.codes.conflict:
                raise ResourceException("Namespace exists or the parent path is not a namespace", e)
            else:
                raise e

    def rmdir(self, args):
        """Implements the mkdir sub-command.
        """
        try:
            self.store.delete_namespace(self.resource)
        except HTTPError as e:
            if e.response.status_code == requests.codes.not_found:
                raise ResourceException('No such object or namespace', e)
            elif e.response.status_code == requests.codes.conflict:
                raise ResourceException("Namespace not empty", e)
            else:
                raise e

    def getacl(self, args):
        """Implements the getacl sub-command.
        """
        if args.role and not args.access:
            raise UsageException("Must use '--access' option with '--role' option")

        try:
            acls = self.store.get_acl(self.resource, args.access, args.role)
            for access in acls:
                print("%s:" % access)
                for role in acls.get(access, []):
                    print("  %s" % role)
        except HTTPError as e:
            if e.response.status_code == requests.codes.not_found:
                raise ResourceException('No such object or namespace or ACL entry', e)
            elif e.response.status_code == requests.codes.bad_request:
                raise ResourceException('Invalid ACL name %s' % args.access, e)
            else:
                raise e

    def setacl(self, args):
        """Implements the setacl sub-command.
        """
        if args.add and len(args.roles) > 1:
            raise UsageException("Option '--add' is only valid for a single role")

        try:
            self.store.set_acl(self.resource, args.access, args.roles, args.add)
        except HTTPError as e:
            if e.response.status_code == requests.codes.not_found:
                raise ResourceException('No such object or namespace', e)
            elif e.response.status_code == requests.codes.bad_request:
                raise ResourceException('Resource cannot be updated as requested', e)
            else:
                raise e

    def delacl(self, args):
        """Implements the getacl sub-command.
        """
        try:
            self.store.del_acl(self.resource, args.access, args.role)
        except HTTPError as e:
            if e.response.status_code == requests.codes.not_found:
                raise ResourceException('No such object or namespace or ACL entry', e)
            elif e.response.status_code == requests.codes.bad_request:
                raise ResourceException('Resource cannot be updated as requested', e)
            else:
                raise e

    def getobj(self, args):
        """Implements the getobj sub-command.
        """
        try:
            if args.outfile and args.outfile == '-':
                r = self.store.get_obj(self.resource)
                logging.debug('Content encoding: %s' % r.apparent_encoding)
                assert r.content, 'content cannot be read as bytes'  # never expected from the requests API
                os.write(sys.stdout.fileno(), r.content)
            else:
                outfilename = args.outfile if args.outfile else basename(self.resource)
                self.store.get_obj(self.resource, destfilename=outfilename)
        except HTTPError as e:
            if e.response.status_code == requests.codes.not_found:
                raise ResourceException('No such object', e)
            else:
                raise e

    def putobj(self, args):
        """Implements the putobj sub-command.
        """
        try:
            content_type = args.content_type if args.content_type else mu.guess_content_type(args.infile)
            loc = self.store.put_obj(
                self.resource, args.infile, headers={"Content-Type": content_type}, parents=args.parents)
            print(loc)
        except HTTPError as e:
            if e.response.status_code == requests.codes.not_found:
                raise ResourceException("Parent namespace not found (use '--parents' to create parent namespace)", e)
            elif e.response.status_code == requests.codes.conflict:
                raise ResourceException(
                    'Cannot create object (parent path is not a namespace or object name is in use)', e)
            else:
                raise e

    def delobj(self, args):
        """Implements the delobj sub-command.
        """
        try:
            self.store.del_obj(self.resource)
        except HTTPError as e:
            if e.response.status_code == requests.codes.not_found:
                raise ResourceException('No such object', e)
            else:
                raise e

    def main(self):
        """Main routine of the CLI.
        """
        args = self.parse_cli()

        def _resource_error_message(emsg):
            return "{prog} {subcmd}: {resource}: {msg}".format(
                prog=self.parser.prog, subcmd=args.subcmd, resource=args.resource, msg=emsg)

        try:
            if not hasattr(args, 'func'):
                self.parser.print_usage()
                return 1

            self._post_parser_init(args)
            args.func(args)
            return 0
        except UsageException as e:
            eprint("{prog} {subcmd}: {msg}".format(prog=self.parser.prog, subcmd=args.subcmd, msg=e))
        except ConnectionError as e:
            eprint("{prog}: Connection error occurred".format(prog=self.parser.prog))
        except DerivaPathError as e:
            eprint(e)
        except HTTPError as e:
            if e.response.status_code == requests.codes.unauthorized:
                msg = 'Authentication required'
            elif e.response.status_code == requests.codes.forbidden:
                msg = 'Permission denied'
            else:
                msg = e
            logging.debug(format_exception(e))
            eprint(_resource_error_message(msg))
        except ResourceException as e:
            logging.debug(format_exception(e.cause))
            eprint(_resource_error_message(e))
        except HatracHashMismatch as e:
            logging.debug(format_exception(e))
            eprint(_resource_error_message('Checksum verification failed'))
        except RuntimeError as e:
            logging.debug(format_exception(e))
            eprint('Unexpected runtime error occurred')
        except:
            eprint('Unexpected error occurred')
            traceback.print_exc()
        return 1
Example #26
0
 def _post_parser_init(self, args):
     """Shared initialization for all sub-commands.
     """
     self.host = args.host if args.host else 'localhost'
     self.resource = urlquote(args.resource, '/')
     self.store = HatracStore('https', args.host, DerivaHatracCLI._get_credential(self.host, args.token))
Example #27
0
    def __init__(self, server, **kwargs):
        self.server = server
        self.hostname = None
        self.catalog = None
        self.store = None
        self.cancelled = False
        self.output_dir = os.path.abspath(kwargs.get("output_dir", "."))
        self.envars = kwargs.get("envars", dict())
        self.config = kwargs.get("config")
        self.credentials = kwargs.get("credentials", dict())
        config_file = kwargs.get("config_file")
        credential_file = kwargs.get("credential_file")
        self.metadata = dict()
        self.sessions = dict()

        info = "%s v%s [Python %s, %s]" % (
            self.__class__.__name__, get_installed_version(VERSION),
            platform.python_version(), platform.platform(aliased=True))
        logging.info("Initializing downloader: %s" % info)

        if not self.server:
            raise DerivaDownloadConfigurationError("Server not specified!")

        # server variable initialization
        self.hostname = self.server.get('host', '')
        if not self.hostname:
            raise DerivaDownloadConfigurationError("Host not specified!")
        protocol = self.server.get('protocol', 'https')
        self.server_url = protocol + "://" + self.hostname
        catalog_id = self.server.get("catalog_id", "1")
        session_config = self.server.get('session')

        # credential initialization
        token = kwargs.get("token")
        oauth2_token = kwargs.get("oauth2_token")
        username = kwargs.get("username")
        password = kwargs.get("password")
        if credential_file:
            self.credentials = get_credential(self.hostname, credential_file)
        elif token or oauth2_token or (username and password):
            self.credentials = format_credential(token=token,
                                                 oauth2_token=oauth2_token,
                                                 username=username,
                                                 password=password)

        # catalog and file store initialization
        if self.catalog:
            del self.catalog
        self.catalog = ErmrestCatalog(protocol,
                                      self.hostname,
                                      catalog_id,
                                      self.credentials,
                                      session_config=session_config)
        if self.store:
            del self.store
        self.store = HatracStore(protocol,
                                 self.hostname,
                                 self.credentials,
                                 session_config=session_config)

        # init dcctx cid to a default
        self.set_dcctx_cid(self.__class__.__name__)

        # process config file
        if config_file:
            try:
                self.config = read_config(config_file)
            except Exception as e:
                raise DerivaDownloadConfigurationError(e)
def compute_pairs(studylist, radii, ratio=None, maxratio=None):
    print('Finding pairs for {0} studies'.format(len(studylist)))

    credential = get_credential(synapseserver)
    ermrest_catalog = ErmrestCatalog('https', synapseserver, 1, credential)
    hatrac_store = HatracStore('https', synapseserver, credentials=credential)

    pairlist = []
    for s in studylist:
        syn_study_id = s['Study']
        s['Paired'] = True

        print('Processing study {0}'.format(syn_study_id))
        study = SynapticPairStudy.from_study_id(ermrest_catalog, syn_study_id)
        try:
            study.retrieve_data(hatrac_store)
        except DerivaPathError:
            print('Study {0} missing synaptic pair'.format(syn_study_id))
            continue
        pairlist.append(s)

        # Compute the actual pairs for the given distances
        s1_to_s2, s2_to_s1 = study.syn_pairing_maps(radii, ratio, maxratio)

        # get results for different radii, store them in a dictonary of pandas
        for i, r in enumerate(radii):

            unpaired1 = study.get_unpaired(s1_to_s2[i, :], study.s1)
            unpaired2 = study.get_unpaired(s2_to_s1[i, :], study.s2)
            paired1, paired2 = study.get_pairs(s1_to_s2[i, :], study.s1, study.s2)

            p = pd.DataFrame(unpaired1[:, 0:5], columns=['z', 'y', 'x', 'core', 'hollow'])
            s['UnpairedBefore'] = s.get('UnpairedBefore', dict())
            s['UnpairedBefore'][r] = {'Data': p}

            p = pd.DataFrame(unpaired2[:, 0:5], columns=['z', 'y', 'x', 'core', 'hollow'])
            s['UnpairedAfter'] = s.get('UnpairedAfter', dict())
            s['UnpairedAfter'][r] = {'Data': p}

            p = pd.DataFrame(paired1[:, 0:5], columns=['z', 'y', 'x', 'core', 'hollow'])
            s['PairedBefore'] = s.get('PairedBefore', dict())
            s['PairedBefore'][r] = {'Data': p}

            p = pd.DataFrame(paired2[:, 0:5], columns=['z', 'y', 'x', 'core', 'hollow'])
            s['PairedAfter'] = s.get('PairedAfter', dict())
            s['PairedAfter'][r] = {'Data': p}

            # Fill in other useful values so you can use them without having the study handy
            for ptype in ['PairedBefore', 'PairedAfter', 'UnpairedBefore', 'UnpairedAfter']:
                p = s[ptype][r]
                p['DataType'] = ptype
                p['Study'] = s['Study']
                p['Radius'] = r
                p['Type'] = s['Type']

            # now compute the centroids and store as pandas.
            for ptype in ['PairedBefore', 'PairedAfter', 'UnpairedBefore', 'UnpairedAfter']:
                p = s[ptype][r]['Data']
                centroid = tuple([p['x'].mean(), p['y'].mean(), p['z'].mean()])
                pc = pd.DataFrame.from_records([centroid], columns=['x', 'y', 'z'])
                cname = ptype + 'Centroid'
                s[cname] = s.get(cname, dict())
                s[cname][r] = {'Data': pc}
                p = s[cname][r]
                p['DataType'] = cname
                p['Study'] = s['Study']
                p['Radius'] = r
                p['Type'] = s['Type']

            # Now compute the aligned images, if you have the tranformation matrix available.
            if s['Aligned']:
                image_obj = s['Alignment']
                s['AlignmentPts'][r] = {'Data': s['StudyAlignmentPts']}
                for ptype in ['PairedBefore', 'PairedAfter', 'UnpairedBefore', 'UnpairedAfter']:
                    p = pd.DataFrame(transform_points(image_obj.M_canonical, s[ptype][r]['Data'].loc[:, ['x', 'y', 'z']]),
                                     columns=['x', 'y', 'z'])
                    p['core'] = s[ptype][r]['Data']['core']

                    # Now do th aligned ....
                    datatype = 'Aligned' + ptype
                    s[datatype] = s.get(datatype, dict())
                    s[datatype][r] = {'Data': p}
                    s[datatype][r]['DataType'] = datatype
                    s[datatype][r]['Study'] = s['Study']
                    s[datatype][r]['Radius'] = r
                    s[datatype][r]['Type'] = s['Type']

                    # now compute the aligned centroids and store as pandas.
                    centroid = tuple([p['x'].mean(), p['y'].mean(), p['z'].mean()])
                    pc = pd.DataFrame.from_records([centroid], columns=['x', 'y', 'z'])
                    pc = pd.DataFrame.from_records([centroid], columns=['x', 'y', 'z'])
                    cname = datatype + 'Centroid'
                    s[cname] = s.get(cname, dict())
                    s[cname][r] = {'Data': pc}
                    s[cname]['DataType'] = cname
                    s[cname]['Study'] = s['Study']
                    s[cname]['Radius'] = r
                    s[cname]['Type'] = s['Type']
    return pairlist