Esempio n. 1
0
 def __init__(self, **kwargs):
     self.metadata = kwargs.get("metadata")
     self.baseuri = kwargs.get("baseuri")
     o = urlparse.urlparse(self.baseuri)
     self.scheme = o[0]
     host_port = o[1].split(":")
     self.host = host_port[0]
     self.path = o.path
     self.port = None
     if len(host_port) > 1:
         self.port = host_port[1]
     self.dzi = kwargs.get("dzi")
     self.thumbnails = kwargs.get("thumbnails")
     self.czi2dzi = kwargs.get("czi2dzi")
     self.viewer = kwargs.get("viewer")
     self.czirules = kwargs.get("czirules")
     self.showinf = kwargs.get("showinf")
     self.data_scratch = kwargs.get("data_scratch")
     self.cookie = kwargs.get("cookie")
     self.store = HatracStore(self.scheme, self.host,
                              {'cookie': self.cookie})
     self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                          self.path.split('/')[-1],
                                          {'cookie': self.cookie})
     self.mail_server = kwargs.get("mail_server")
     self.mail_sender = kwargs.get("mail_sender")
     self.mail_receiver = kwargs.get("mail_receiver")
     self.logger = kwargs.get("logger")
     self.logger.debug('Client initialized.')
Esempio n. 2
0
 def __init__(self, **kwargs):
     self.metadata = kwargs.get("metadata")
     self.baseuri = kwargs.get("baseuri")
     o = urlparse.urlparse(self.baseuri)
     self.scheme = o[0]
     host_port = o[1].split(":")
     self.host = host_port[0]
     self.path = o.path
     self.port = None
     if len(host_port) > 1:
         self.port = host_port[1]
     self.dzi = kwargs.get("dzi")
     self.thumbnails = kwargs.get("thumbnails")
     self.czi2dzi = kwargs.get("czi2dzi")
     self.viewer = kwargs.get("viewer")
     self.czirules = kwargs.get("czirules")
     self.showinf = kwargs.get("showinf")
     self.data_scratch = kwargs.get("data_scratch")
     self.cookie = kwargs.get("cookie")
     self.store = HatracStore(
         self.scheme, 
         self.host,
         {'cookie': self.cookie}
     )
     self.catalog = PollingErmrestCatalog(
         self.scheme, 
         self.host,
         self.path.split('/')[-1],
         {'cookie': self.cookie}
     )
     self.mail_server = kwargs.get("mail_server")
     self.mail_sender = kwargs.get("mail_sender")
     self.mail_receiver = kwargs.get("mail_receiver")
     self.logger = kwargs.get("logger")
     self.logger.debug('Client initialized.')
Esempio n. 3
0
 def __init__(self, **kwargs):
     self.baseuri = kwargs.get("baseuri")
     o = urlparse.urlparse(self.baseuri)
     self.scheme = o[0]
     host_port = o[1].split(":")
     self.host = host_port[0]
     self.path = o.path
     self.port = None
     if len(host_port) > 1:
         self.port = host_port[1]
     self.cookie = kwargs.get("cookie")
     self.store = HatracStore(
         self.scheme, 
         self.host,
         {'cookie': self.cookie}
     )
     self.catalog = PollingErmrestCatalog(
         self.scheme, 
         self.host,
         self.path.split('/')[-1],
         {'cookie': self.cookie}
     )
     self.mail_server = kwargs.get("mail_server")
     self.mail_sender = kwargs.get("mail_sender")
     self.mail_receiver = kwargs.get("mail_receiver")
     self.logger = kwargs.get("logger")
     self.logger.debug('Hatrac Client initialized.')
Esempio n. 4
0
 def __init__(self, **kwargs):
     self.baseuri = kwargs.get("baseuri")
     o = urlparse.urlparse(self.baseuri)
     self.scheme = o[0]
     host_port = o[1].split(":")
     self.host = host_port[0]
     self.path = o.path
     self.port = None
     if len(host_port) > 1:
         self.port = host_port[1]
     self.cookie = kwargs.get("cookie")
     self.client_secrets_file = kwargs.get("client_secrets_file")
     self.client_oauth2_file = kwargs.get("client_oauth2_file")
     self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                          self.path.split('/')[-1],
                                          {'cookie': self.cookie})
     self.mail_server = kwargs.get("mail_server")
     self.mail_sender = kwargs.get("mail_sender")
     self.mail_receiver = kwargs.get("mail_receiver")
     self.logger = kwargs.get("logger")
     self.logger.debug('Delete YouTube Client initialized.')
Esempio n. 5
0
 def __init__(self, **kwargs):
     self.baseuri = kwargs.get("baseuri")
     o = urlparse.urlparse(self.baseuri)
     self.scheme = o[0]
     host_port = o[1].split(":")
     self.host = host_port[0]
     self.path = o.path
     self.port = None
     if len(host_port) > 1:
         self.port = host_port[1]
     self.cookie = kwargs.get("cookie")
     self.client_secrets_file = kwargs.get("client_secrets_file")
     self.client_oauth2_file = kwargs.get("client_oauth2_file")
     self.data_scratch = kwargs.get("data_scratch")
     self.ffmpeg = kwargs.get("ffmpeg")
     self.ffprobe = kwargs.get("ffprobe")
     self.category = kwargs.get("category")
     self.keywords = kwargs.get("keywords")
     self.privacyStatus = kwargs.get("privacyStatus")
     self.text_position = kwargs.get("text_position")
     self.store = HatracStore(self.scheme, self.host,
                              {'cookie': self.cookie})
     self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                          self.path.split('/')[-1],
                                          {'cookie': self.cookie})
     self.mail_server = kwargs.get("mail_server")
     self.mail_sender = kwargs.get("mail_sender")
     self.mail_receiver = kwargs.get("mail_receiver")
     self.logger = kwargs.get("logger")
     argparser.add_argument("--config",
                            required=True,
                            help="YouTube configuration file")
     self.args = argparser.parse_args()
     self.args.category = self.category
     self.args.keywords = self.keywords
     self.args.privacyStatus = self.privacyStatus
     self.args.noauth_local_webserver = True
     self.logger.debug('Upload YouTube Client initialized.')
Esempio n. 6
0
class YouTubeClient(object):
    """
    Network client for YouTube.
    """

    ## Derived from the ermrest iobox service client

    def __init__(self, **kwargs):
        self.baseuri = kwargs.get("baseuri")
        o = urlparse.urlparse(self.baseuri)
        self.scheme = o[0]
        host_port = o[1].split(":")
        self.host = host_port[0]
        self.path = o.path
        self.port = None
        if len(host_port) > 1:
            self.port = host_port[1]
        self.cookie = kwargs.get("cookie")
        self.client_secrets_file = kwargs.get("client_secrets_file")
        self.client_oauth2_file = kwargs.get("client_oauth2_file")
        self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                             self.path.split('/')[-1],
                                             {'cookie': self.cookie})
        self.mail_server = kwargs.get("mail_server")
        self.mail_sender = kwargs.get("mail_sender")
        self.mail_receiver = kwargs.get("mail_receiver")
        self.logger = kwargs.get("logger")
        self.logger.debug('Delete YouTube Client initialized.')

    """
    Send email notification
    """

    def sendMail(self, subject, text):
        if self.mail_server and self.mail_sender and self.mail_receiver:
            retry = 0
            ready = False
            while not ready:
                try:
                    msg = MIMEText('%s\n\n%s' % (text, mail_footer), 'plain')
                    msg['Subject'] = subject
                    msg['From'] = self.mail_sender
                    msg['To'] = self.mail_receiver
                    s = smtplib.SMTP(self.mail_server)
                    s.sendmail(self.mail_sender, self.mail_receiver.split(','),
                               msg.as_string())
                    s.quit()
                    self.logger.debug('Sent email notification.')
                    ready = True
                except socket.gaierror as e:
                    if e.errno == socket.EAI_AGAIN:
                        time.sleep(100)
                        retry = retry + 1
                        ready = retry > 10
                    else:
                        ready = True
                    if ready:
                        et, ev, tb = sys.exc_info()
                        self.logger.error('got exception "%s"' % str(ev))
                        self.logger.error(
                            '%s' % str(traceback.format_exception(et, ev, tb)))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error('got exception "%s"' % str(ev))
                    self.logger.error(
                        '%s' % str(traceback.format_exception(et, ev, tb)))
                    ready = True

    """
    Start the process for deleting files from YouTube
    """

    def start(self):
        try:
            self.deleteFromYouTube()
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail(
                'FAILURE Delete YouTube: unexpected exception',
                '%s\nThe process might have been stopped\n' %
                str(traceback.format_exception(et, ev, tb)))
            raise

    """
    Get the YouTube Delete credentials
    """

    def youtube_authenticated_service(self):
        flow = InstalledAppFlow.from_client_secrets_file(
            self.client_secrets_file, SCOPES)
        storage = Storage(self.client_oauth2_file)
        credentials = storage.get()
        if credentials is None or credentials.invalid:
            credentials = flow.run_console()
        self.youtube = build(API_SERVICE_NAME,
                             API_VERSION,
                             credentials=credentials)

    """
    Delete a video from YouTube
    """

    def youtube_delete(self, youtube_uri):
        res = False
        try:
            id = youtube_uri[youtube_uri.rfind('/') + 1:youtube_uri.index('?')]
            self.logger.debug('Deleting YouTube video id="%s".' % (id))
            self.youtube_authenticated_service()
            if self.youtube is not None:
                self.logger.debug(
                    'Authenticated to the YouTube delete service.')
                response = self.youtube.videos().delete(id=id).execute()
                self.logger.debug('Deleted response %s.' % (response))
                res = True
            else:
                self.logger.debug(
                    'Authentication for deleting a YouTube video failed.')
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got YouTube exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))

        return res

    """
    Delete videos from YouTube
    """

    def deleteFromYouTube(self):
        url = '/entity/Common:Delete_Youtube/Youtube_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::'
        resp = self.catalog.get(url)
        resp.raise_for_status()
        files = resp.json()
        fileids = []
        for f in files:
            fileids.append((f['YouTube_URI'], f['RID']))

        self.logger.debug('Deleting from YouTube %d videos(s).' %
                          (len(fileids)))
        for youtube_uri, rid in fileids:
            try:
                youtube_deleted = self.youtube_delete(youtube_uri)
                if youtube_deleted == True:
                    self.logger.debug(
                        'SUCCEEDED deleted from YouTube the video with the URL: "%s".'
                        % (youtube_uri))
                    columns = ["Youtube_Deleted", "Processing_Status"]
                    columns = ','.join([urlquote(col) for col in columns])
                    url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (
                        columns)
                    obj = {
                        'RID': rid,
                        'Youtube_Deleted': True,
                        'Processing_Status': 'success'
                    }
                    self.catalog.put(url, json=[obj])
                    self.logger.debug(
                        'SUCCEEDED updated the Common:Delete_Youtube table entry for the YouTube URL: "%s".'
                        % (youtube_uri))
                else:
                    self.logger.debug(
                        'Failure in deleting from YouTube the video with the URL: "%s".'
                        % (youtube_uri))
                    self.sendMail(
                        'FAILURE Delete YouTube: YouTube Failure',
                        'The video "%s" could not be deleted from Youtube.' %
                        youtube_uri)
                    self.reportFailure(rid, 'YouTube Failure')
            except Exception as e:
                et, ev, tb = sys.exc_info()
                self.logger.error('got exception "%s"' % str(ev))
                self.logger.error('%s' %
                                  str(traceback.format_exception(et, ev, tb)))
                self.reportFailure(rid, str(e))

    """
    Update the Delete_Youtube table with the ERROR status
    """

    def reportFailure(self, rid, error_message):
        """
            Update the Delete_Youtube table with the failure result.
        """
        try:
            columns = ["Processing_Status"]
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (columns)
            obj = {'RID': rid, 'Processing_Status': '%s' % error_message}
            self.catalog.put(url, json=[obj])
            self.logger.debug(
                'SUCCEEDED updated the Delete_Youtube table for the RID "%s"  with the Processing_Status result "%s".'
                % (rid, error_message))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Delete YouTube: reportFailure ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))
Esempio n. 7
0
class Worker(object):
    # server to talk to... defaults to our own FQDN
    servername = os.getenv('SYNSPY_SERVER', platform.uname()[1])

    # secret session cookie
    credentials = get_credential(servername,
                                 credential_file=os.getenv(
                                     'SYNSPY_CREDENTIALS',
                                     DEFAULT_CREDENTIAL_FILE))

    poll_seconds = int(os.getenv('SYNSPY_POLL_SECONDS', '600'))

    scriptdir = os.getenv('SYNSPY_PATH')
    scriptdir = '%s/' % scriptdir if scriptdir else ''

    # remember where we started
    startup_working_dir = os.getcwd()

    tmpdir = os.getenv('TMPDIR', '/var/tmp')

    # track per-instance working dirs
    working_dirs = dict()

    # these are peristent/logical connections so we create once and reuse
    # they can retain state and manage an actual HTTP connection-pool
    catalog = PollingErmrestCatalog('https', servername, '1', credentials)

    store = HatracStore('https', servername, credentials)

    # for state-tracking across look_for_work() iterations
    idle_etag = None

    def __init__(self, row, unit):
        sys.stderr.write('Claimed job %s.\n' % row.get('RID'))

        self.row = row
        self.unit = unit
        self.subject_path = '/hatrac/Zf/Zf_%s' % row['Subject']

        self.working_dir = None
        # we want a temporary work space for our working files
        self.working_dir = tempfile.mkdtemp(dir=self.tmpdir)
        self.working_dirs[self.working_dir] = self.working_dir
        os.chdir(self.working_dir)
        sys.stderr.write('Using working directory %s.\n' % self.working_dir)

    @staticmethod
    def cleanup_working_dir(dirname):
        sys.stderr.write('Purging working directory %s... ' % dirname)
        shutil.rmtree(dirname)
        sys.stderr.write('done.\n')

    def cleanup(self):
        sys.stderr.write('\n')
        os.chdir(self.startup_working_dir)
        if self.working_dir:
            self.cleanup_working_dir(self.working_dir)
            del self.working_dirs[self.working_dir]
            self.working_dir = None

    def get_file(self, url):
        """Download file from URL returning local file name"""
        # short-cut, read file directly out of local hatrac
        filename = '/var/www' + url
        if os.path.isfile(filename):
            return filename
        else:
            # but fall back to HTTPS for remote workers...
            m = re.match('^(?P<basename>[^:]+)(?P<v>[:][0-9A-Z]+)?$',
                         os.path.basename(url))
            filename = m.groupdict()['basename']
            self.store.get_obj(url, destfilename=filename)
            return filename

    def get_image_info(self, img_filename):
        """Extract image resolution and shape."""
        try:
            I, md = load_image(str(img_filename))
        except Exception as e:
            raise WorkerBadDataError(
                'Image %s could not be loaded... is it the wrong format? %s' %
                (img_filename, e))
        if not hasattr(I, 'micron_spacing'):
            raise WorkerBadDataError(
                'Image %s lacks expected micron_spacing attribute.' %
                img_filename)
        return I.micron_spacing, I.shape

    def preprocess_roi(self, img_filename, zyx_slice, omit_voxels=False):
        """Analyze ROI and upload resulting NPZ file, returning NPZ URL."""
        command = [self.scriptdir + 'synspy-analyze', img_filename]
        env = {
            'ZYX_SLICE':
            zyx_slice,
            'ZYX_IMAGE_GRID':
            '0.4,0.26,0.26',
            'SYNSPY_DETECT_NUCLEI':
            str(self.row['Segmentation Mode'].lower() == 'nucleic'),
            'DUMP_PREFIX':
            './ROI_%s' % self.row['RID'],
            'OMIT_VOXELS':
            str(omit_voxels).lower(),
        }
        sys.stderr.write('Using analysis environment %r\n' % (env, ))
        analysis = subprocess.Popen(command, stdin=fnull, env=env)
        code = analysis.wait()
        del analysis
        if code != 0:
            raise WorkerRuntimeError('Non-zero analysis exit status %s!' %
                                     code)

        return self.store.put_loc(
            '%s/ROI_%s.npz' % (self.subject_path, self.row['RID']),
            'ROI_%s.npz' % self.row['RID'],
            headers={'Content-Type': 'application/octet-stream'})

    def filter_synspy_csv(self, csv_url):
        """Process input CSV URL and upload filtered CSV, returning CSV URL."""
        # this should really be dead code in practice... current launcher uploads filtered csv directly
        m = re.match('^(?P<basename>.+)[.]csv(?P<v>[:][0-9A-Z]+)?$',
                     os.path.basename(csv_url))
        base = m.groupdict()['basename']
        csv_filename = '%s.csv' % base

        # download the content to temp dir
        self.store.get_obj(csv_url, destfilename=csv_filename)

        # prepare to read CSV content from temp dir
        csv_file = open(csv_filename, 'r')
        reader = csv.DictReader(csv_file)

        # prepare to write filtered CSV to temp dir
        filtered_filename = '%s_only.csv' % base
        filtered_file = open(filtered_filename, 'w', newline='')
        writer = csv.writer(filtered_file)

        # write header
        writer.writerow(('Z', 'Y', 'X', 'raw core', 'raw hollow', 'DoG core',
                         'DoG hollow') +
                        (('red', ) if 'red' in reader.fieldnames else ()) +
                        ('override', ))

        # copy w/ filtering
        for row in reader:
            if row['Z'] == 'saved' and row['Y'] == 'parameters' \
               or row['override'] and int(row['override']) == 7:
                writer.writerow(
                    (row['Z'], row['Y'], row['X'], row['raw core'],
                     row['raw hollow'], row['DoG core'], row['DoG hollow']) +
                    ((row['red'], ) if 'red' in reader.fieldnames else ()) +
                    (row['override'], ))

        del reader
        csv_file.close()
        del writer
        filtered_file.close()

        return self.store.put_loc('%s/%s' %
                                  (self.subject_path, segments_filtered_file),
                                  segments_filtered_file,
                                  headers={'Content-Type': 'text/csv'})

    def compute_synspy_stats(self, csv_url, existing_row={}):
        """Process input CSV URL and return stats column value updates."""
        filename = self.get_file(csv_url)
        c, m, s, p = util.load_segment_info_from_csv(filename,
                                                     (0.4, 0.26, 0.26),
                                                     filter_status=(3, 7))
        if c.shape[0] > 0:
            stats = {
                'Core Min.': float(m[:, 0].min()),
                'Core Max.': float(m[:, 0].max()),
                'Core Sum': float(m[:, 0].sum()),
                '#Centroids': int(m.shape[0]),
                'Core Mean': float(m[:, 0].mean()),
            }
        else:
            stats = {
                'Core Min.': None,
                'Core Max.': None,
                'Core Sum': None,
                '#Centroids': 0,
                'Core Mean': None,
            }
        return {
            k: v
            for k, v in stats.items()
            if k not in existing_row or existing_row[k] != v
        }

    def register_nuclei(self,
                        n1_url,
                        n2_url,
                        zyx_scale=(0.4, 0.26, 0.26),
                        filter_status=(3, 7)):
        """Register nuclei files returning alignment matrix and processed and uploaded pointcloud URLs.

           Returns:
             M, n1_url, n2_url
        """
        n1_filename = self.get_file(n1_url)
        n2_filename = self.get_file(n2_url)
        nuc1cmsp = util.load_segment_info_from_csv(n1_filename,
                                                   zyx_scale,
                                                   filter_status=filter_status)
        nuc2cmsp = util.load_segment_info_from_csv(n2_filename,
                                                   zyx_scale,
                                                   filter_status=filter_status)
        M, angles = register.align_centroids(nuc1cmsp[0], nuc2cmsp[0])
        nuc2cmsp = (register.transform_centroids(M,
                                                 nuc2cmsp[0]), ) + nuc2cmsp[1:]
        n1_outfile = 'ImagePair_%s_n1_registered.csv' % self.row['RID']
        n2_outfile = 'ImagePair_%s_n2_registered.csv' % self.row['RID']
        register.dump_registered_file_pair((n1_outfile, n2_outfile),
                                           (nuc1cmsp, nuc2cmsp))
        n1_url = self.store.put_loc('%s/%s' % (self.subject_path, n1_outfile),
                                    n1_outfile,
                                    headers={'Content-Type': 'text/csv'})
        n2_url = self.store.put_loc('%s/%s' % (self.subject_path, n2_outfile),
                                    n2_outfile,
                                    headers={'Content-Type': 'text/csv'})
        return M, n1_url, n2_url

    def matrix_to_prejson(self, M):
        return [[float(M[i, j]) for j in range(4)] for i in range(4)]

    def register_synapses(self,
                          s1_url,
                          s2_url,
                          zyx_scale=(0.4, 0.26, 0.26),
                          filter_status=(3, 7)):
        """Register synaptic files using image pair alignment, returning URLs of processed and uploaded pointcloud URLs.

           Returns:
             s1_url, s2_url
        """
        s1_filename = self.get_file(s1_url)
        s2_filename = self.get_file(s2_url)
        syn1cmsp = util.load_segment_info_from_csv(s1_filename,
                                                   zyx_scale,
                                                   filter_status=filter_status)
        syn2cmsp = util.load_segment_info_from_csv(s2_filename,
                                                   zyx_scale,
                                                   filter_status=filter_status)
        M = np.array(self.row['Alignment'], dtype=np.float64)
        syn2cmsp = (register.transform_centroids(M,
                                                 syn2cmsp[0]), ) + syn2cmsp[1:]
        s1_outfile = 'SynapticPair_%s_s1_registered.csv' % self.row.get('RID')
        s2_outfile = 'SynapticPair_%s_s2_registered.csv' % self.row.get('RID')
        register.dump_registered_file_pair((s1_outfile, s2_outfile),
                                           (syn1cmsp, syn2cmsp))
        s1_url = self.store.put_loc('%s/%s' % (self.subject_path, s1_outfile),
                                    s1_outfile,
                                    headers={'Content-Type': 'text/csv'})
        s2_url = self.store.put_loc('%s/%s' % (self.subject_path, s2_outfile),
                                    s2_outfile,
                                    headers={'Content-Type': 'text/csv'})
        return s1_url, s2_url

    def put_row_update(self, update_row):
        self.catalog.put('%s;%s' % (self.unit.put_update_baseurl, ','.join([
            urlquote(col, safe='')
            for col in list(update_row.keys()) if col not in ['ID', 'RID']
        ])),
                         json=[update_row])
        sys.stderr.write('\nupdated in ERMrest: %s' %
                         json.dumps(update_row, indent=2))

    work_units = _work_units  # these are defined above w/ their funcs and URLs...

    @classmethod
    def look_for_work(cls):
        """Find, claim, and process work for each work unit.

        Do find/claim with HTTP opportunistic concurrency control and
        caching for efficient polling and quiescencs.

        On error, set Status="failed: reason"

        Result:
         true: there might be more work to claim
         false: we failed to find any work
        """
        found_work = False

        for unit in cls.work_units:
            # this handled concurrent update for us to safely and efficiently claim a record
            unit.idle_etag, batch = cls.catalog.state_change_once(
                unit.get_claimable_url, unit.put_claim_url,
                unit.claim_input_data, unit.idle_etag)
            # batch may be empty if no work was found...
            for row, claim in batch:
                found_work = True
                handler = None
                try:
                    handler = cls(row, unit)
                    unit.run_row_job(handler)
                except WorkerBadDataError as e:
                    sys.stderr.write("Aborting task %s on data error: %s\n" %
                                     (row["RID"], e))
                    cls.catalog.put(unit.put_claim_url,
                                    json=[unit.failure_input_data(row, e)])
                    # continue with next task...?
                except Exception as e:
                    # TODO: eat some exceptions and return True to continue?
                    if unit.failure_input_data is not None:
                        cls.catalog.put(unit.put_claim_url,
                                        json=[unit.failure_input_data(row, e)])
                    raise
                finally:
                    if handler is not None:
                        handler.cleanup()

        return found_work

    @classmethod
    def blocking_poll(cls):
        return cls.catalog.blocking_poll(cls.look_for_work,
                                         polling_seconds=cls.poll_seconds)
Esempio n. 8
0
class PyramidalClient (object):
    """Network client for generating pyramidal tiles.
    """
    ## Derived from the ermrest iobox service client

    def __init__(self, **kwargs):
        self.metadata = kwargs.get("metadata")
        self.baseuri = kwargs.get("baseuri")
        o = urlparse.urlparse(self.baseuri)
        self.scheme = o[0]
        host_port = o[1].split(":")
        self.host = host_port[0]
        self.path = o.path
        self.port = None
        if len(host_port) > 1:
            self.port = host_port[1]
        self.dzi = kwargs.get("dzi")
        self.thumbnails = kwargs.get("thumbnails")
        self.czi2dzi = kwargs.get("czi2dzi")
        self.viewer = kwargs.get("viewer")
        self.czirules = kwargs.get("czirules")
        self.showinf = kwargs.get("showinf")
        self.data_scratch = kwargs.get("data_scratch")
        self.cookie = kwargs.get("cookie")
        self.store = HatracStore(
            self.scheme, 
            self.host,
            {'cookie': self.cookie}
        )
        self.catalog = PollingErmrestCatalog(
            self.scheme, 
            self.host,
            self.path.split('/')[-1],
            {'cookie': self.cookie}
        )
        self.mail_server = kwargs.get("mail_server")
        self.mail_sender = kwargs.get("mail_sender")
        self.mail_receiver = kwargs.get("mail_receiver")
        self.logger = kwargs.get("logger")
        self.logger.debug('Client initialized.')

    """
    Send email notification
    """
    def sendMail(self, subject, text):
        if self.mail_server and self.mail_sender and self.mail_receiver:
            retry = 0
            ready = False
            while not ready:
                try:
                    msg = MIMEText('%s\n\n%s' % (text, mail_footer), 'plain')
                    msg['Subject'] = subject
                    msg['From'] = self.mail_sender
                    msg['To'] = self.mail_receiver
                    s = smtplib.SMTP(self.mail_server)
                    s.sendmail(self.mail_sender, self.mail_receiver.split(','), msg.as_string())
                    s.quit()
                    self.logger.debug('Sent email notification.')
                    ready = True
                except socket.gaierror as e:
                    if e.errno == socket.EAI_AGAIN:
                        time.sleep(100)
                        retry = retry + 1
                        ready = retry > 10
                    else:
                        ready = True
                    if ready:
                        et, ev, tb = sys.exc_info()
                        self.logger.error('got exception "%s"' % str(ev))
                        self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error('got exception "%s"' % str(ev))
                    self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                    ready = True

    """
    Start the process for generating pyramidal tiles
    """
    def start(self):
        try:
            self.processHistologicalImages()
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: unexpected exception', '%s\nThe process might have been stopped\n' % str(traceback.format_exception(et, ev, tb)))
            raise
        
    def processHistologicalImages(self):
        """
        Query for detecting new slides - the most recently first
        """
        url = '/entity/Histological_Images:HE_Slide/!File_Bytes::null::&Pyramid_URL::null::/Processing_Status=in%%20progress;Processing_Status::null::@sort(%s::desc::)' % (urlquote('RCT'))
        
        resp = self.catalog.get(url)
        resp.raise_for_status()
        slides = resp.json()
        slideids = []
        for slide in slides:
            slideids.append((slide['ID'], slide['Filename'], slide['File_URL'], slide['RCT'], slide['File_MD5'], slide['Name'], slide['RID']))
                
        self.logger.debug('Processing %d HistologicalImages slides(s).' % (len(slideids))) 
                
        for slideId,filename,file_url,creation_time,md5,name,rid in slideids:
            self.logger.debug('Generating pyramidal tiles for the file "%s"' % (filename))
            
            """
            Extract the file from hatrac
            """
            f = self.getHatracFile(filename, file_url)
            
            if f == None:
                continue
            
            """
            Create the directory for the tiles
            """
            year = parse(creation_time).strftime("%Y")
            outdir = '%s/%s/%s' % (self.dzi, year, md5)
            if not os.path.exists(outdir):
                os.makedirs(outdir)
            
            """
            Convert the file to DZI
            """
            returncode = self.convert2dzi(f, outdir)
            
            if returncode != 0:
                """
                Update the slide table with the failure result.
                """
                self.updateAttributes('Histological_Images:HE_Slide',
                                     rid,
                                     ["Thumbnail", "Processing_Status"],
                                     {'RID': rid,
                                      'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                                      'Processing_Status': 'czi2dzi error'
                                      })
                continue
            
            """
            Generate the thumbnail
            """
            thumbnail,urls = self.writeThumbnailImage(f, year, md5)
            
            if thumbnail == None:
                """
                Update the slide table with the failure result.
                """
                self.updateAttributes('Histological_Images:HE_Slide',
                                     rid,
                                     ["Thumbnail", "Processing_Status"],
                                     {'RID': rid,
                                      'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                                      'Processing_Status': 'DZI failure'
                                      })
                continue
                
            """
            Extract the metadata
            """
            self.logger.debug('Extracting metadata for filename "%s"' % (filename)) 
            bioformatsClient = BioformatsClient(showinf=self.showinf, \
                                                czirules=self.czirules, \
                                                cziFile=f, \
                                                logger=self.logger)
            try:
                metadata = bioformatsClient.getMetadata()
                if metadata == None:
                    metadata = {}
                self.logger.debug('Metadata: "%s"' % str(metadata)) 
                os.remove('temp.xml')
            except XMLSyntaxError:
                et, ev, tb = sys.exc_info()
                self.logger.error('got unexpected exception "%s"' % str(ev))
                self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                self.sendMail('FAILURE Tiles: XMLSyntaxError', '%s\n' % str(traceback.format_exception(et, ev, tb)))
                metadata = {}
                    
            os.remove(f)
            
            """
            Update the slide table with the success result.
            """
            self.updateAttributes('Histological_Images:HE_Slide',
                                 rid,
                                 ["Thumbnail","Pyramid_URL","Processing_Status","uri"],
                                 {'RID': rid,
                                   'Thumbnail': thumbnail,
                                   'Pyramid_URL': '/%s?%s' % (self.viewer, urls),
                                   'uri': '/%s?%s' % (self.viewer, urls),
                                   "Processing_Status": 'success'
                                  })
            
            self.logger.debug('SUCCEEDED created the tiles directory for the file "%s".' % (filename)) 
            
            """
            Update/Create the image entry with the metadata
            """
            obj = {}
            obj['ID'] = slideId
            obj['Name'] = name
            obj['url'] = '/chaise/viewer/#2/Histological_Images:HE_Slide/ID=%d' % slideId
            columns = ['ID', 'Name', 'url']
            for col in self.metadata:
                if col in metadata and metadata[col] != None:
                    columns.append(col)
                    obj[col] = metadata[col]
                    
            """
            Check if we have an update or create
            """
            rid = self.getRID('Histological_Images:HE_Image', 'ID=%d' % slideId)
            if rid != None:
                obj['RID'] = rid
                self.updateAttributes('Histological_Images:HE_Image',
                                     rid,
                                     columns,
                                     obj
                                      )
            else:
                self.createEntity('Histological_Images:HE_Image', obj)
                
            self.logger.debug('SUCCEEDED created the image entry for the file "%s".' % (filename)) 
            
        self.logger.debug('Ended HistologicalImages Slides Processing.') 
        
    """
    Extract the file from hatrac
    """
    def getHatracFile(self, filename, file_url):
        try:
            hatracFile = '%s/%s' % (self.data_scratch, filename)
            self.store.get_obj(file_url, destfilename=hatracFile)
            self.logger.debug('File "%s", %d bytes.' % (hatracFile, os.stat(hatracFile).st_size)) 
            return hatracFile
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: write thumbnail ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
            return None

    """
    Generate the thumbnail
    """
    def writeThumbnailImage(self, filename, year, md5):
        try:
            scanDir='%s/%s/%s' % (self.dzi, year, md5)
            channels = []
            for channel in os.listdir(scanDir):
                if os.path.isdir('%s%s%s' % (scanDir, os.sep, channel)):
                   channels.append( channel)
            outdir = '%s/%s' % (self.thumbnails, year)
            if not os.path.exists(outdir):
                os.makedirs(outdir)
            shutil.copyfile('%s/%s/%s/%s/0/0_0.jpg' % (self.dzi, year, md5, channels[0]), '%s/%s.jpg' % (outdir, md5))
            thumbnail = '/thumbnails/%s/%s.jpg' % (urlquote(year), urlquote(md5))
            urls = []
            for channel in channels:
                urls.append('url=/data/%s/%s/%s/ImageProperties.xml' % (year, md5, channel))
            return (thumbnail, '&'.join(urls))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: write thumbnail ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
            os.remove(filename)
            return (None, None)
            
    """
    Convert the input file to DZI
    """
    def convert2dzi(self, filename, outdir):
        try:
            currentDirectory=os.getcwd()
            os.chdir(self.dzi)
            args = [self.czi2dzi, filename, outdir]
            p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            stdoutdata, stderrdata = p.communicate()
            returncode = p.returncode
            os.chdir(currentDirectory)
            
            if returncode != 0:
                self.logger.error('Can not convert czi to dzi for file "%s".\nstdoutdata: %s\nstderrdata: %s\n' % (filename, stdoutdata, stderrdata)) 
                self.sendMail('FAILURE Tiles', 'Can not convert czi to dzi for file "%s".\nstdoutdata: %s\nstderrdata: %s\n' % (filename, stdoutdata, stderrdata))
                os.remove(filename)
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: czi2dzi ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
            os.chdir(currentDirectory)
            self.logger.error('Can not generate pyramidal tiles for the file "%s".\nstdoutdata: %s\nstderrdata: %s\n' % (filename, stdoutdata, stderrdata)) 
            self.sendMail('FAILURE Tiles', 'Can not generate pyramidal tiles for the file "%s".\nstdoutdata: %s\nstderrdata: %s\n' % (filename, stdoutdata, stderrdata))
            returncode = 1
            
        return returncode
            
        
    """
    Update the ermrest attributes
    """
    def updateAttributes (self, path, rid, columns, row):
        """
        Update the ermrest attributes with the row values.
        """
        try:
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/%s/RID;%s' % (path, columns)
            resp = self.catalog.put(
                url,
                json=[row]
            )
            resp.raise_for_status()
            self.logger.debug('SUCCEEDED updated the table "%s" for the RID "%s"  with "%s".' % (path, rid, json.dumps(row, indent=4))) 
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
            
    """
    Insert a row in a table
    """
    def createEntity (self, path, row):
        """
        Insert the row in the table.
        """
        try:
            url = '/entity/%s' % (path)
            resp = self.catalog.post(
                url,
                json=[row]
            )
            resp.raise_for_status()
            self.logger.debug('SUCCEEDED created in the table "%s" the entry "%s".' % (path, json.dumps(row, indent=4))) 
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))

    """
    Check if an entry exist in the table
    Return the RID if it exists, and None otherwise
    """
    def getRID (self, path, predicate):
        """
        Get the RID of the row.
        """
        try:
            RID = None
            url = '/entity/%s/%s' % (path, predicate)
            resp = self.catalog.get(url)
            resp.raise_for_status()
            rows = resp.json()
            if len(rows) == 1:
                RID = rows[0]['RID']
            self.logger.debug('RID for the url = "%s" is "%s".' % (url, RID)) 
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
        
        return RID
Esempio n. 9
0
class YouTubeClient (object):
    """
    Network client for YouTube.
    """
    ## Derived from the ermrest iobox service client

    def __init__(self, **kwargs):
        self.baseuri = kwargs.get("baseuri")
        o = urlparse.urlparse(self.baseuri)
        self.scheme = o[0]
        host_port = o[1].split(":")
        self.host = host_port[0]
        self.path = o.path
        self.port = None
        if len(host_port) > 1:
            self.port = host_port[1]
        self.cookie = kwargs.get("cookie")
        self.client_secrets_file = kwargs.get("client_secrets_file")
        self.client_oauth2_file = kwargs.get("client_oauth2_file")
        self.catalog = PollingErmrestCatalog(
            self.scheme, 
            self.host,
            self.path.split('/')[-1],
            {'cookie': self.cookie}
        )
        self.mail_server = kwargs.get("mail_server")
        self.mail_sender = kwargs.get("mail_sender")
        self.mail_receiver = kwargs.get("mail_receiver")
        self.logger = kwargs.get("logger")
        self.logger.debug('Delete YouTube Client initialized.')

    """
    Send email notification
    """
    def sendMail(self, subject, text):
        if self.mail_server and self.mail_sender and self.mail_receiver:
            retry = 0
            ready = False
            while not ready:
                try:
                    msg = MIMEText('%s\n\n%s' % (text, mail_footer), 'plain')
                    msg['Subject'] = subject
                    msg['From'] = self.mail_sender
                    msg['To'] = self.mail_receiver
                    s = smtplib.SMTP(self.mail_server)
                    s.sendmail(self.mail_sender, self.mail_receiver.split(','), msg.as_string())
                    s.quit()
                    self.logger.debug('Sent email notification.')
                    ready = True
                except socket.gaierror as e:
                    if e.errno == socket.EAI_AGAIN:
                        time.sleep(100)
                        retry = retry + 1
                        ready = retry > 10
                    else:
                        ready = True
                    if ready:
                        et, ev, tb = sys.exc_info()
                        self.logger.error('got exception "%s"' % str(ev))
                        self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error('got exception "%s"' % str(ev))
                    self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                    ready = True

    """
    Start the process for deleting files from YouTube
    """
    def start(self):
        try:
            self.deleteFromYouTube()
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Delete YouTube: unexpected exception', '%s\nThe process might have been stopped\n' % str(traceback.format_exception(et, ev, tb)))
            raise
        
    """
    Get the YouTube Delete credentials
    """
    def youtube_authenticated_service(self):
        flow = InstalledAppFlow.from_client_secrets_file(self.client_secrets_file, SCOPES)
        storage = Storage(self.client_oauth2_file)
        credentials = storage.get()
        if credentials is None or credentials.invalid:
            credentials = flow.run_console()
        self.youtube = build(API_SERVICE_NAME, API_VERSION, credentials = credentials)

    """
    Delete a video from YouTube
    """
    def youtube_delete(self, youtube_uri):
        res = False
        try:
            id = youtube_uri[youtube_uri.rfind('/')+1:youtube_uri.index('?')]
            self.logger.debug('Deleting YouTube video id="%s".' % (id)) 
            self.youtube_authenticated_service()
            if self.youtube is not None:
                self.logger.debug('Authenticated to the YouTube delete service.')
                response = self.youtube.videos().delete(id=id).execute()
                self.logger.debug('Deleted response %s.' % (response)) 
                res = True
            else:
                self.logger.debug('Authentication for deleting a YouTube video failed.') 
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got YouTube exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            
        return res
        
    """
    Delete videos from YouTube
    """
    def deleteFromYouTube(self):
        url = '/entity/Common:Delete_Youtube/Youtube_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::' 
        resp = self.catalog.get(url)
        resp.raise_for_status()
        files = resp.json()
        fileids = []
        for f in files:
            fileids.append((f['YouTube_URI'], f['RID']))
                
        self.logger.debug('Deleting from YouTube %d videos(s).' % (len(fileids))) 
        for youtube_uri,rid in fileids:
            try:
                youtube_deleted = self.youtube_delete(youtube_uri)
                if youtube_deleted == True:
                    self.logger.debug('SUCCEEDED deleted from YouTube the video with the URL: "%s".' % (youtube_uri)) 
                    columns = ["Youtube_Deleted", "Processing_Status"]
                    columns = ','.join([urlquote(col) for col in columns])
                    url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (columns)
                    obj = {'RID': rid,
                           'Youtube_Deleted': True,
                           'Processing_Status': 'success'
                           }
                    self.catalog.put(
                        url,
                        json=[obj]
                    )
                    self.logger.debug('SUCCEEDED updated the Common:Delete_Youtube table entry for the YouTube URL: "%s".' % (youtube_uri)) 
                else:
                    self.logger.debug('Failure in deleting from YouTube the video with the URL: "%s".' % (youtube_uri)) 
                    self.sendMail('FAILURE Delete YouTube: YouTube Failure', 'The video "%s" could not be deleted from Youtube.' % youtube_uri)
                    self.reportFailure(rid, 'YouTube Failure')
            except Exception as e:
                et, ev, tb = sys.exc_info()
                self.logger.error('got exception "%s"' % str(ev))
                self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                self.reportFailure(rid, str(e))
        
        
    """
    Update the Delete_Youtube table with the ERROR status
    """
    def reportFailure(self, rid, error_message):
        """
            Update the Delete_Youtube table with the failure result.
        """
        try:
            columns = ["Processing_Status"]
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/Common:Delete_Youtube/RID;%s' % (columns)
            obj = {'RID': rid,
                   'Processing_Status': '%s' % error_message
                   }
            self.catalog.put(
                url,
                json=[obj]
            )
            self.logger.debug('SUCCEEDED updated the Delete_Youtube table for the RID "%s"  with the Processing_Status result "%s".' % (rid, error_message)) 
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Delete YouTube: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
Esempio n. 10
0
class PyramidalClient(object):
    """Network client for generating pyramidal tiles.
    """

    ## Derived from the ermrest iobox service client

    def __init__(self, **kwargs):
        self.metadata = kwargs.get("metadata")
        self.baseuri = kwargs.get("baseuri")
        o = urlparse.urlparse(self.baseuri)
        self.scheme = o[0]
        host_port = o[1].split(":")
        self.host = host_port[0]
        self.path = o.path
        self.port = None
        if len(host_port) > 1:
            self.port = host_port[1]
        self.dzi = kwargs.get("dzi")
        self.thumbnails = kwargs.get("thumbnails")
        self.czi2dzi = kwargs.get("czi2dzi")
        self.viewer = kwargs.get("viewer")
        self.czirules = kwargs.get("czirules")
        self.showinf = kwargs.get("showinf")
        self.data_scratch = kwargs.get("data_scratch")
        self.cookie = kwargs.get("cookie")
        self.store = HatracStore(self.scheme, self.host,
                                 {'cookie': self.cookie})
        self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                             self.path.split('/')[-1],
                                             {'cookie': self.cookie})
        self.mail_server = kwargs.get("mail_server")
        self.mail_sender = kwargs.get("mail_sender")
        self.mail_receiver = kwargs.get("mail_receiver")
        self.logger = kwargs.get("logger")
        self.logger.debug('Client initialized.')

    """
    Send email notification
    """

    def sendMail(self, subject, text):
        if self.mail_server and self.mail_sender and self.mail_receiver:
            retry = 0
            ready = False
            while not ready:
                try:
                    msg = MIMEText('%s\n\n%s' % (text, mail_footer), 'plain')
                    msg['Subject'] = subject
                    msg['From'] = self.mail_sender
                    msg['To'] = self.mail_receiver
                    s = smtplib.SMTP(self.mail_server)
                    s.sendmail(self.mail_sender, self.mail_receiver.split(','),
                               msg.as_string())
                    s.quit()
                    self.logger.debug('Sent email notification.')
                    ready = True
                except socket.gaierror as e:
                    if e.errno == socket.EAI_AGAIN:
                        time.sleep(100)
                        retry = retry + 1
                        ready = retry > 10
                    else:
                        ready = True
                    if ready:
                        et, ev, tb = sys.exc_info()
                        self.logger.error('got exception "%s"' % str(ev))
                        self.logger.error(
                            '%s' % str(traceback.format_exception(et, ev, tb)))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error('got exception "%s"' % str(ev))
                    self.logger.error(
                        '%s' % str(traceback.format_exception(et, ev, tb)))
                    ready = True

    """
    Start the process for generating pyramidal tiles
    """

    def start(self):
        try:
            self.processHistologicalImages()
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail(
                'FAILURE Tiles: unexpected exception',
                '%s\nThe process might have been stopped\n' %
                str(traceback.format_exception(et, ev, tb)))
            raise

    def processHistologicalImages(self):
        """
        Query for detecting new slides - the most recently first
        """
        url = '/entity/Histological_Images:HE_Slide/!File_Bytes::null::&Pyramid_URL::null::/Processing_Status=in%%20progress;Processing_Status::null::@sort(%s::desc::)' % (
            urlquote('RCT'))

        resp = self.catalog.get(url)
        resp.raise_for_status()
        slides = resp.json()
        slideids = []
        for slide in slides:
            slideids.append(
                (slide['ID'], slide['Filename'], slide['File_URL'],
                 slide['RCT'], slide['File_MD5'], slide['Name'], slide['RID']))

        self.logger.debug('Processing %d HistologicalImages slides(s).' %
                          (len(slideids)))

        for slideId, filename, file_url, creation_time, md5, name, rid in slideids:
            self.logger.debug('Generating pyramidal tiles for the file "%s"' %
                              (filename))
            """
            Extract the file from hatrac
            """
            f = self.getHatracFile(filename, file_url)

            if f == None:
                continue
            """
            Create the directory for the tiles
            """
            year = parse(creation_time).strftime("%Y")
            outdir = '%s/%s/%s' % (self.dzi, year, md5)
            if not os.path.exists(outdir):
                os.makedirs(outdir)
            """
            Convert the file to DZI
            """
            returncode = self.convert2dzi(f, outdir)

            if returncode != 0:
                """
                Update the slide table with the failure result.
                """
                self.updateAttributes(
                    'Histological_Images:HE_Slide', rid,
                    ["Thumbnail", "Processing_Status"], {
                        'RID': rid,
                        'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                        'Processing_Status': 'czi2dzi error'
                    })
                continue
            """
            Generate the thumbnail
            """
            thumbnail, urls = self.writeThumbnailImage(f, year, md5)

            if thumbnail == None:
                """
                Update the slide table with the failure result.
                """
                self.updateAttributes(
                    'Histological_Images:HE_Slide', rid,
                    ["Thumbnail", "Processing_Status"], {
                        'RID': rid,
                        'Thumbnail': '/thumbnails/generic/generic_genetic.png',
                        'Processing_Status': 'DZI failure'
                    })
                continue
            """
            Extract the metadata
            """
            self.logger.debug('Extracting metadata for filename "%s"' %
                              (filename))
            bioformatsClient = BioformatsClient(showinf=self.showinf, \
                                                czirules=self.czirules, \
                                                cziFile=f, \
                                                logger=self.logger)
            try:
                metadata = bioformatsClient.getMetadata()
                if metadata == None:
                    metadata = {}
                self.logger.debug('Metadata: "%s"' % str(metadata))
                os.remove('temp.xml')
            except XMLSyntaxError:
                et, ev, tb = sys.exc_info()
                self.logger.error('got unexpected exception "%s"' % str(ev))
                self.logger.error('%s' %
                                  str(traceback.format_exception(et, ev, tb)))
                self.sendMail(
                    'FAILURE Tiles: XMLSyntaxError',
                    '%s\n' % str(traceback.format_exception(et, ev, tb)))
                metadata = {}

            os.remove(f)
            """
            Update the slide table with the success result.
            """
            self.updateAttributes(
                'Histological_Images:HE_Slide', rid,
                ["Thumbnail", "Pyramid_URL", "Processing_Status", "uri"], {
                    'RID': rid,
                    'Thumbnail': thumbnail,
                    'Pyramid_URL': '/%s?%s' % (self.viewer, urls),
                    'uri': '/%s?%s' % (self.viewer, urls),
                    "Processing_Status": 'success'
                })

            self.logger.debug(
                'SUCCEEDED created the tiles directory for the file "%s".' %
                (filename))
            """
            Update/Create the image entry with the metadata
            """
            obj = {}
            obj['ID'] = slideId
            obj['Name'] = name
            obj['url'] = '/chaise/viewer/#2/Histological_Images:HE_Slide/ID=%d' % slideId
            columns = ['ID', 'Name', 'url']
            for col in self.metadata:
                if col in metadata and metadata[col] != None:
                    columns.append(col)
                    obj[col] = metadata[col]
            """
            Check if we have an update or create
            """
            rid = self.getRID('Histological_Images:HE_Image',
                              'ID=%d' % slideId)
            if rid != None:
                obj['RID'] = rid
                self.updateAttributes('Histological_Images:HE_Image', rid,
                                      columns, obj)
            else:
                self.createEntity('Histological_Images:HE_Image', obj)

            self.logger.debug(
                'SUCCEEDED created the image entry for the file "%s".' %
                (filename))

        self.logger.debug('Ended HistologicalImages Slides Processing.')

    """
    Extract the file from hatrac
    """

    def getHatracFile(self, filename, file_url):
        try:
            hatracFile = '%s/%s' % (self.data_scratch, filename)
            self.store.get_obj(file_url, destfilename=hatracFile)
            self.logger.debug('File "%s", %d bytes.' %
                              (hatracFile, os.stat(hatracFile).st_size))
            return hatracFile
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: write thumbnail ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))
            return None

    """
    Generate the thumbnail
    """

    def writeThumbnailImage(self, filename, year, md5):
        try:
            scanDir = '%s/%s/%s' % (self.dzi, year, md5)
            channels = []
            for channel in os.listdir(scanDir):
                if os.path.isdir('%s%s%s' % (scanDir, os.sep, channel)):
                    channels.append(channel)
            outdir = '%s/%s' % (self.thumbnails, year)
            if not os.path.exists(outdir):
                os.makedirs(outdir)
            shutil.copyfile(
                '%s/%s/%s/%s/0/0_0.jpg' % (self.dzi, year, md5, channels[0]),
                '%s/%s.jpg' % (outdir, md5))
            thumbnail = '/thumbnails/%s/%s.jpg' % (urlquote(year),
                                                   urlquote(md5))
            urls = []
            for channel in channels:
                urls.append('url=/data/%s/%s/%s/ImageProperties.xml' %
                            (year, md5, channel))
            return (thumbnail, '&'.join(urls))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: write thumbnail ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))
            os.remove(filename)
            return (None, None)

    """
    Convert the input file to DZI
    """

    def convert2dzi(self, filename, outdir):
        try:
            currentDirectory = os.getcwd()
            os.chdir(self.dzi)
            args = [self.czi2dzi, filename, outdir]
            p = subprocess.Popen(args,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            stdoutdata, stderrdata = p.communicate()
            returncode = p.returncode
            os.chdir(currentDirectory)

            if returncode != 0:
                self.logger.error(
                    'Can not convert czi to dzi for file "%s".\nstdoutdata: %s\nstderrdata: %s\n'
                    % (filename, stdoutdata, stderrdata))
                self.sendMail(
                    'FAILURE Tiles',
                    'Can not convert czi to dzi for file "%s".\nstdoutdata: %s\nstderrdata: %s\n'
                    % (filename, stdoutdata, stderrdata))
                os.remove(filename)
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: czi2dzi ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))
            os.chdir(currentDirectory)
            self.logger.error(
                'Can not generate pyramidal tiles for the file "%s".\nstdoutdata: %s\nstderrdata: %s\n'
                % (filename, stdoutdata, stderrdata))
            self.sendMail(
                'FAILURE Tiles',
                'Can not generate pyramidal tiles for the file "%s".\nstdoutdata: %s\nstderrdata: %s\n'
                % (filename, stdoutdata, stderrdata))
            returncode = 1

        return returncode

    """
    Update the ermrest attributes
    """

    def updateAttributes(self, path, rid, columns, row):
        """
        Update the ermrest attributes with the row values.
        """
        try:
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/%s/RID;%s' % (path, columns)
            resp = self.catalog.put(url, json=[row])
            resp.raise_for_status()
            self.logger.debug(
                'SUCCEEDED updated the table "%s" for the RID "%s"  with "%s".'
                % (path, rid, json.dumps(row, indent=4)))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))

    """
    Insert a row in a table
    """

    def createEntity(self, path, row):
        """
        Insert the row in the table.
        """
        try:
            url = '/entity/%s' % (path)
            resp = self.catalog.post(url, json=[row])
            resp.raise_for_status()
            self.logger.debug(
                'SUCCEEDED created in the table "%s" the entry "%s".' %
                (path, json.dumps(row, indent=4)))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))

    """
    Check if an entry exist in the table
    Return the RID if it exists, and None otherwise
    """

    def getRID(self, path, predicate):
        """
        Get the RID of the row.
        """
        try:
            RID = None
            url = '/entity/%s/%s' % (path, predicate)
            resp = self.catalog.get(url)
            resp.raise_for_status()
            rows = resp.json()
            if len(rows) == 1:
                RID = rows[0]['RID']
            self.logger.debug('RID for the url = "%s" is "%s".' % (url, RID))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Tiles: reportFailure ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))

        return RID
Esempio n. 11
0
class HatracClient (object):
    """
    Network client for hatrac.
    """
    ## Derived from the ermrest iobox service client

    def __init__(self, **kwargs):
        self.baseuri = kwargs.get("baseuri")
        o = urlparse.urlparse(self.baseuri)
        self.scheme = o[0]
        host_port = o[1].split(":")
        self.host = host_port[0]
        self.path = o.path
        self.port = None
        if len(host_port) > 1:
            self.port = host_port[1]
        self.cookie = kwargs.get("cookie")
        self.store = HatracStore(
            self.scheme, 
            self.host,
            {'cookie': self.cookie}
        )
        self.catalog = PollingErmrestCatalog(
            self.scheme, 
            self.host,
            self.path.split('/')[-1],
            {'cookie': self.cookie}
        )
        self.mail_server = kwargs.get("mail_server")
        self.mail_sender = kwargs.get("mail_sender")
        self.mail_receiver = kwargs.get("mail_receiver")
        self.logger = kwargs.get("logger")
        self.logger.debug('Hatrac Client initialized.')

    """
    Send email notification
    """
    def sendMail(self, subject, text):
        if self.mail_server and self.mail_sender and self.mail_receiver:
            retry = 0
            ready = False
            while not ready:
                try:
                    msg = MIMEText('%s\n\n%s' % (text, mail_footer), 'plain')
                    msg['Subject'] = subject
                    msg['From'] = self.mail_sender
                    msg['To'] = self.mail_receiver
                    s = smtplib.SMTP(self.mail_server)
                    s.sendmail(self.mail_sender, self.mail_receiver.split(','), msg.as_string())
                    s.quit()
                    self.logger.debug('Sent email notification.')
                    ready = True
                except socket.gaierror as e:
                    if e.errno == socket.EAI_AGAIN:
                        time.sleep(100)
                        retry = retry + 1
                        ready = retry > 10
                    else:
                        ready = True
                    if ready:
                        et, ev, tb = sys.exc_info()
                        self.logger.error('got exception "%s"' % str(ev))
                        self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error('got exception "%s"' % str(ev))
                    self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                    ready = True

    """
    Start the process for deleting files from hatrac
    """
    def start(self):
        try:
            self.deleteFromHatrac()
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Delete Hatrac: unexpected exception', '%s\nThe process might have been stopped\n' % str(traceback.format_exception(et, ev, tb)))
            raise
        
    """
    Delete videos from hatrac
    """
    def deleteFromHatrac(self):
        url = '/entity/Common:Delete_Hatrac/Hatrac_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::' 
        resp = self.catalog.get(url)
        resp.raise_for_status()
        files = resp.json()
        fileids = []
        for f in files:
            fileids.append((f['Hatrac_URI'], f['RID']))
                
        self.logger.debug('Deleting from hatrac %d files(s).' % (len(fileids))) 
        for hatrac_uri,rid in fileids:
            try:
                self.store.del_obj(hatrac_uri)
                self.logger.debug('SUCCEEDED deleted from hatrac the "%s" file.' % (hatrac_uri)) 
                columns = ["Hatrac_Deleted", "Processing_Status"]
                columns = ','.join([urlquote(col) for col in columns])
                url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns)
                obj = {'RID': rid,
                       'Hatrac_Deleted': True,
                       'Processing_Status': 'success'
                       }
                self.catalog.put(
                    url,
                    json=[obj]
                )
                self.logger.debug('SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac URL: "%s".' % (hatrac_uri)) 
            except Exception as e:
                et, ev, tb = sys.exc_info()
                self.logger.error('got exception "%s"' % str(ev))
                self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
                self.reportFailure(rid, str(e))
        
        
    """
    Update the Delete_Hatrac table with the ERROR status
    """
    def reportFailure(self, rid, error_message):
        """
            Update the Delete_Hatrac table with the failure result.
        """
        try:
            columns = ["Processing_Status"]
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns)
            obj = {'RID': rid,
                   'Processing_Status': '%s' % error_message
                   }
            self.catalog.put(
                url,
                json=[obj]
            )
            self.logger.debug('SUCCEEDED updated the Delete_Hatrac table for the RID "%s"  with the Processing_Status result "%s".' % (rid, error_message)) 
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' % str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Delete Hatrac: reportFailure ERROR', '%s\n' % str(traceback.format_exception(et, ev, tb)))
Esempio n. 12
0
class HatracClient(object):
    """
    Network client for hatrac.
    """

    ## Derived from the ermrest iobox service client

    def __init__(self, **kwargs):
        self.baseuri = kwargs.get("baseuri")
        o = urlparse.urlparse(self.baseuri)
        self.scheme = o[0]
        host_port = o[1].split(":")
        self.host = host_port[0]
        self.path = o.path
        self.port = None
        if len(host_port) > 1:
            self.port = host_port[1]
        self.cookie = kwargs.get("cookie")
        self.store = HatracStore(self.scheme, self.host,
                                 {'cookie': self.cookie})
        self.catalog = PollingErmrestCatalog(self.scheme, self.host,
                                             self.path.split('/')[-1],
                                             {'cookie': self.cookie})
        self.mail_server = kwargs.get("mail_server")
        self.mail_sender = kwargs.get("mail_sender")
        self.mail_receiver = kwargs.get("mail_receiver")
        self.logger = kwargs.get("logger")
        self.logger.debug('Hatrac Client initialized.')

    """
    Send email notification
    """

    def sendMail(self, subject, text):
        if self.mail_server and self.mail_sender and self.mail_receiver:
            retry = 0
            ready = False
            while not ready:
                try:
                    msg = MIMEText('%s\n\n%s' % (text, mail_footer), 'plain')
                    msg['Subject'] = subject
                    msg['From'] = self.mail_sender
                    msg['To'] = self.mail_receiver
                    s = smtplib.SMTP(self.mail_server)
                    s.sendmail(self.mail_sender, self.mail_receiver.split(','),
                               msg.as_string())
                    s.quit()
                    self.logger.debug('Sent email notification.')
                    ready = True
                except socket.gaierror as e:
                    if e.errno == socket.EAI_AGAIN:
                        time.sleep(100)
                        retry = retry + 1
                        ready = retry > 10
                    else:
                        ready = True
                    if ready:
                        et, ev, tb = sys.exc_info()
                        self.logger.error('got exception "%s"' % str(ev))
                        self.logger.error(
                            '%s' % str(traceback.format_exception(et, ev, tb)))
                except:
                    et, ev, tb = sys.exc_info()
                    self.logger.error('got exception "%s"' % str(ev))
                    self.logger.error(
                        '%s' % str(traceback.format_exception(et, ev, tb)))
                    ready = True

    """
    Start the process for deleting files from hatrac
    """

    def start(self):
        try:
            self.deleteFromHatrac()
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got unexpected exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail(
                'FAILURE Delete Hatrac: unexpected exception',
                '%s\nThe process might have been stopped\n' %
                str(traceback.format_exception(et, ev, tb)))
            raise

    """
    Delete videos from hatrac
    """

    def deleteFromHatrac(self):
        url = '/entity/Common:Delete_Hatrac/Hatrac_Deleted=FALSE/Processing_Status=in%20progress;Processing_Status::null::'
        resp = self.catalog.get(url)
        resp.raise_for_status()
        files = resp.json()
        fileids = []
        for f in files:
            fileids.append((f['Hatrac_URI'], f['RID']))

        self.logger.debug('Deleting from hatrac %d files(s).' % (len(fileids)))
        for hatrac_uri, rid in fileids:
            try:
                self.store.del_obj(hatrac_uri)
                self.logger.debug(
                    'SUCCEEDED deleted from hatrac the "%s" file.' %
                    (hatrac_uri))
                columns = ["Hatrac_Deleted", "Processing_Status"]
                columns = ','.join([urlquote(col) for col in columns])
                url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns)
                obj = {
                    'RID': rid,
                    'Hatrac_Deleted': True,
                    'Processing_Status': 'success'
                }
                self.catalog.put(url, json=[obj])
                self.logger.debug(
                    'SUCCEEDED updated the Common:Delete_Hatrac table entry for the Hatrac URL: "%s".'
                    % (hatrac_uri))
            except Exception as e:
                et, ev, tb = sys.exc_info()
                self.logger.error('got exception "%s"' % str(ev))
                self.logger.error('%s' %
                                  str(traceback.format_exception(et, ev, tb)))
                self.reportFailure(rid, str(e))

    """
    Update the Delete_Hatrac table with the ERROR status
    """

    def reportFailure(self, rid, error_message):
        """
            Update the Delete_Hatrac table with the failure result.
        """
        try:
            columns = ["Processing_Status"]
            columns = ','.join([urlquote(col) for col in columns])
            url = '/attributegroup/Common:Delete_Hatrac/RID;%s' % (columns)
            obj = {'RID': rid, 'Processing_Status': '%s' % error_message}
            self.catalog.put(url, json=[obj])
            self.logger.debug(
                'SUCCEEDED updated the Delete_Hatrac table for the RID "%s"  with the Processing_Status result "%s".'
                % (rid, error_message))
        except:
            et, ev, tb = sys.exc_info()
            self.logger.error('got exception "%s"' % str(ev))
            self.logger.error('%s' %
                              str(traceback.format_exception(et, ev, tb)))
            self.sendMail('FAILURE Delete Hatrac: reportFailure ERROR',
                          '%s\n' % str(traceback.format_exception(et, ev, tb)))