def __init__(self,
                 nas_path=None,
                 rig_path=None,
                 archive_path=None,
                 backup_path=None):
        self._nas_path = None
        self._rig_path = None
        self._archive_path = None
        self._backup_path = None

        if nas_path is not None:
            self._nas_path = nas_path
            self.site_dh = getDirHandle(nas_path)
        elif rig_path is not None:
            self._rig_path = rig_path
            self.site_dh = getDirHandle(rig_path)
        elif archive_path is not None:
            self._archive_path = archive_path
            self.site_dh = getDirHandle(archive_path)
        elif backup_path is not None:
            self._backup_path = backup_path
            self.site_dh = getDirHandle(backup_path)

        self.site_info = self.site_dh.info()
        self._slice_info = None
        self._expt_info = None
        self._specimen_info = None
    def create_db_entries(cls, job_id, session):
        slices = all_slices()
        path = slices[job_id]
        dh = getDirHandle(path)
        info = dh.info()
        parent_info = dh.parent().info()
        
        # pull some metadata from LIMS
        sid = info['specimen_ID'].strip()
        limsdata = lims.specimen_info(sid)

        quality = info.get('slice quality', None)
        try:
            quality = int(quality)
        except Exception:
            quality = None

        # Interpret slice time
        slice_time = parent_info.get('time_of_dissection', None)
        if slice_time == '':
            slice_time = None
        if slice_time is not None:
            m = re.match(r'((20\d\d)-(\d{1,2})-(\d{1,2}) )?(\d+):(\d+)', slice_time.strip())
            if m is not None:
                _, year, mon, day, hh, mm = m.groups()
                if year is None:
                    date = datetime.fromtimestamp(dh.parent().info()['__timestamp__'])
                    slice_time = datetime(date.year, date.month, date.day, int(hh), int(mm))
                else:
                    slice_time = datetime(int(year), int(mon), int(day), int(hh), int(mm))

        # construct full genotype string 
        genotype = limsdata['genotype'] or ''
        for info in (parent_info, info):
            inj = info.get('injections')
            if inj in (None, ''):
                continue
            if inj not in constants.INJECTIONS:
                raise KeyError("Injection %r is unknown in constants.INJECTIONS" % inj)
            genotype = ';'.join(genotype.split(';') + [constants.INJECTIONS[inj]])

        fields = {
            'acq_timestamp': info['__timestamp__'],
            'species': limsdata['organism'],
            'date_of_birth': limsdata['date_of_birth'],
            'age': limsdata['age'],
            'sex': limsdata['sex'],
            'genotype': genotype,
            'orientation': limsdata['plane_of_section'],
            'surface': limsdata['exposed_surface'],
            'hemisphere': limsdata['hemisphere'],
            'quality': quality,
            'slice_time': slice_time,
            'slice_conditions': {},
            'lims_specimen_name': sid,
            'storage_path': dh.name(relativeTo=dh.parent().parent()),
        }

        sl = db.Slice(**fields)
        session.add(sl)
Beispiel #3
0
    def _get_raw_paths(self):
        expt_subpath = self.expt_subpath

        # find the local primary/archive paths that contain this experiment
        found_paths = False
        rig_name = self.rig_name
        if rig_name is None:
            return
        rig_data_paths = config.rig_data_paths.get(rig_name, [])
        for path_set in rig_data_paths:
            for root in path_set.values():
                test_path = os.path.join(root, expt_subpath)
                if not os.path.isdir(test_path):
                    continue
                dh = getDirHandle(test_path)
                if self.site_info is None:
                    raise Exception ('%s %s missing index file' % (self, self.path))
                if dh.info()['__timestamp__'] == self.site_info['__timestamp__']:
                    found_paths = True
                    # set self._primary_path, self._archive_path, etc.
                    for k,v in path_set.items():
                        setattr(self, '_'+k+'_path', os.path.join(v, expt_subpath))
                    break
            if found_paths:
                break
Beispiel #4
0
def find_submittable_expts():
    """Search synphys data storage for experiments that are ready to be submitted to LIMS.
    """

    # Start by finding all site paths that have an nwb file and a file_manifest.yml
    all_nwbs = glob.glob(
        os.path.join(config.synphys_data, '*', 'slice_*', 'site_*', '*.nwb'))
    all_sites = OrderedDict()
    for nwb in all_nwbs:
        path = os.path.dirname(nwb)
        if 'file_manifest.yml' in os.listdir(path):
            all_sites[path] = 1

    # filter out anything that has been submitted already
    ready_sites = []
    for path in all_sites.keys():
        site_dh = getDirHandle(path)
        acq_ts = site_dh.info()['__timestamp__']
        spec_name = site_dh.parent().info()['specimen_ID'].strip()
        spec_id = lims.specimen_id_from_name(spec_name)
        subs = lims.expt_submissions(spec_id, acq_ts)
        if len(subs) == 0:
            ready_sites.append(site_dh)

    return ready_sites
def sync_experiment(site_dir):
    dh = getDirHandle(site_dir)
    sub = RawDataSubmission(dh)
    err, warn = sub.check()
    if len(err) > 0:
        return [], err, warn
    sub.submit()
    return sub.changes, err, warn
Beispiel #6
0
    def __init__(self, path=None):
        Experiment.__init__(self, verify=False)
        self._site_path = path

        self.site_dh = getDirHandle(path)
        self._rig_name = None
        self._primary_path = None
        self._archive_path = None
        self._backup_path = None

        # reassign path based on order of most likely to be updated
        for path_typ in ['primary', 'archive', 'nas', 'backup']:
            path = getattr(self, path_typ + '_path')
            if path is not None and os.path.exists(path):
                self._site_path = path
                self.site_dh = getDirHandle(path)
                # reset values loaded while determining path
                self._expt_info = None
                self._slice_info = None
                self._site_info = None
                break
Beispiel #7
0
def all_slices():
    """Return a dict mapping {slice_timestamp: path} for all known slices.
    
    This is only generated once per running process; set _all_slices = None
    to force the list to be regenerated.
    """
    global _all_slices
    if _all_slices is not None:
        return _all_slices
        
    # Speed things up by caching this list with a 4 hour timeout
    cachefile = os.path.join(config.cache_path, 'all_slices.pkl')
    if os.path.exists(cachefile):
        age = time.time() - os.stat(cachefile).st_mtime
        if age < 4 * 3600:
            print("Loaded slice timestamps from cache (%0.1f hours old)" % (age/3600.))
            return pickle.load(open(cachefile, 'r'))
    
    #slice_dirs = sorted(glob.glob(os.path.join(config.synphys_data, '*', 'slice_*')))
    expt_csv = config.experiment_csv
    csv_entries = []
    with open(expt_csv, 'r') as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            csv_entries.append(row)

    slice_dirs = sorted([os.path.split(os.path.join(config.synphys_data, exp['site_path']))[0] for exp in csv_entries])

    _all_slices = OrderedDict()
    for path in slice_dirs:
        dh = getDirHandle(path)
        ts = dh.info().get('__timestamp__')
        if ts is None:
            #print("MISSING TIMESTAMP: %s" % path)
            _all_slices.update([('%.3f'%0.0, 'place_holder')])
            continue
        ts = '%0.3f'%ts ## convert timestamp to string here, make sure it has 3 decimal places
        _all_slices[ts] = path
        
    try:
        tmpfile = cachefile+'.tmp'
        pickle.dump(_all_slices, open(tmpfile, 'w'))
        os.rename(tmpfile, cachefile)
    except:
        if os.path.exists(tmpfile):
            os.remove(tmpfile)
    
    return _all_slices
Beispiel #8
0
    def loadClicked(self):
        try:
            startDir = self.manager.getCurrentDir()
        except Exception:
            startDir = self.manager.getBaseDir()
        dirname = Qt.QFileDialog.getExistingDirectory(self.win, "Open Record",
                                                      startDir.name())
        if dirname == '':
            return
        self.recordDir = getDirHandle(dirname)
        self.recordWritable = False
        self.updateFileLabel()
        self.clearChannels()

        for dev in self.recordDir.ls():
            w = self.addChannel(dev, mode=None, recordDir=self.recordDir)
def sync_experiment(site_dir):
    """Synchronize all files for an experiment to the server.

    Argument must be the path of an experiment _site_ folder. This will also cause
    synchronization for the parent (slice) and grandparent (day) folders to ensure
    that all slice images and metadata are copied. Sibling site and slice folders
    will _not_ be copied.

    Return a list of changes made.
    """
    site_dh = getDirHandle(site_dir)
    changes = []
    slice_dh = site_dh.parent()
    expt_dh = slice_dh.parent()

    now = time.strftime('%Y-%m-%d_%H:%M:%S')
    log("========== %s : Sync %s to server" % (now, site_dh.name()))
    skipped = 0

    try:
        # Decide how the top-level directory will be named on the remote server
        # (it may already be there from a previous slice/site, or the current
        # name may already be taken by another rig.)
        server_expt_path = os.path.join(config.synphys_data,
                                        get_server_path(expt_dh))

        log("    using server path: %s" % server_expt_path)
        skipped += _sync_paths(expt_dh.name(), server_expt_path, changes)

        # Copy slice files if needed
        server_slice_path = os.path.join(server_expt_path,
                                         slice_dh.shortName())
        skipped += _sync_paths(slice_dh.name(), server_slice_path, changes)

        # Copy site files if needed
        server_site_path = os.path.join(server_slice_path, site_dh.shortName())
        skipped += _sync_paths(site_dh.name(), server_site_path, changes)

        log("    Done; skipped %d files." % skipped)

    except Exception:
        err = traceback.format_exc()
        changes.append(('error', site_dh.name(), err))
        log(err)

    return changes
Beispiel #10
0
    def poll(self):
        # Find all available site paths across all data sources
        count = 0
        path = self.search_path

        self.update.emit(path, "Updating...")
        root_dh = getDirHandle(path)

        # iterate over all expt sites in this path
        for day_name in sorted(os.listdir(root_dh.name()), reverse=True):
            for expt_path in glob.iglob(
                    os.path.join(root_dh.name(), day_name, 'slice_*',
                                 'site_*')):
                if self._stop or not self.enable_polling:
                    return

                try:
                    expt = ExperimentMetadata(path=expt_path)
                    ts = expt.timestamp
                except:
                    print('Error loading %s, ignoring and moving on...' %
                          expt_path)
                    sys.excepthook(*sys.exc_info())
                    continue
                # Couldn't get timestamp; show an error message
                if ts is None:
                    print("Error getting timestamp for %s" % expt)
                    continue

                with self.known_expts_lock:
                    now = time.time()
                    if ts in self.known_expts:
                        expt, last_update = self.known_expts[ts]
                        if now - last_update < self.interval:
                            # We've already seen this expt recently; skip
                            continue
                    self.known_expts[ts] = (expt, now)

                # Add this expt to the queue to be checked
                self.expt_queue.put((-ts, expt))

                count += 1
                if self.limit > 0 and count >= self.limit:
                    return
        self.update.emit(path, "Finished")
def sync_experiment(site_dir):
    """Synchronize all files for an experiment to the server.

    Argument must be the path of an experiment _site_ folder. This will also cause
    synchronization for the parent (slice) and grandparent (day) folders to ensure
    that all slice images and metadata are copied. Sibling site and slice folders
    will _not_ be copied.

    Return a list of changes made.
    """
    site_dh = getDirHandle(site_dir)
    changes = []
    slice_dh = site_dh.parent()
    expt_dh = slice_dh.parent()
    
    now = time.strftime('%Y-%m-%d_%H:%M:%S')
    log("========== %s : Sync %s to server" % (now, site_dh.name()))
    skipped = 0
    
    try:
        # Decide how the top-level directory will be named on the remote server
        # (it may already be there from a previous slice/site, or the current
        # name may already be taken by another rig.)
        server_expt_path = os.path.join(config.synphys_data, get_server_path(expt_dh))
        
        log("    using server path: %s" % server_expt_path)
        skipped += _sync_paths(expt_dh.name(), server_expt_path, changes)
        
        # Copy slice files if needed
        server_slice_path = os.path.join(server_expt_path, slice_dh.shortName())
        skipped += _sync_paths(slice_dh.name(), server_slice_path, changes)

        # Copy site files if needed
        server_site_path = os.path.join(server_slice_path, site_dh.shortName())
        skipped += _sync_paths(site_dh.name(), server_site_path, changes)
        
        log("    Done; skipped %d files." % skipped)
        
    except Exception:
        err = traceback.format_exc()
        changes.append(('error', site_dh.name(), err))
        log(err)

    return changes
Beispiel #12
0
    def poll(self):

        self.session = database.Session()
        expts = {}

        print("loading site paths..")
        #site_paths = glob.glob(os.path.join(config.synphys_data, '*', 'slice_*', 'site_*'))

        root_dh = getDirHandle(config.synphys_data)

        print(root_dh.name())
        for site_dh in self.list_expts(root_dh):
            expt = ExperimentMetadata(nas_path=site_dh.name())
            if expt.timestamp in expts:
                continue
            expts[expt.timestamp] = expt
            self.check(expt)
            if self.limit > 0 and len(expts) > self.limit:
                break
def generate_expt_path_cache():
    global _expt_path_cache
    _expt_path_cache = {}
    root = getDirHandle(config.synphys_data)
    for f in root.ls():
        if 'recycle' in f.lower():
            continue
        dh = root[f]
        if not dh.isDir():
            continue
        try:
            acq_timestamp = dh.info()['__timestamp__']
        except KeyError:
            print("NO TIMESTAMP:", dh.name())
            sys.exit(-1)
        if acq_timestamp in _expt_path_cache:
            raise Exception("timestamp %s appears twice in synphys data!!" %
                            acq_timestamp)
        _expt_path_cache[acq_timestamp] = dh.name(relativeTo=root)
    write_expt_path_cache()
    def poll(self):
        # Find all available site paths across all data sources
        count = 0
        path = self.search_path

        self.update.emit(path, "Updating...")
        root_dh = getDirHandle(path)

        # iterate over all expt sites in this path
        for day_name in sorted(os.listdir(root_dh.name()), reverse=True):
            for expt_path in glob.iglob(os.path.join(root_dh.name(), day_name, 'slice_*', 'site_*')):
                if self._stop or not self.enable_polling:
                    return

                try:
                    expt = ExperimentMetadata(path=expt_path)
                    ts = expt.timestamp
                except:
                    print ('Error loading %s, ignoring and moving on...' % expt_path)    
                    continue
                # Couldn't get timestamp; show an error message
                if ts is None:
                    print("Error getting timestamp for %s" % expt)
                    continue

                with self.known_expts_lock:
                    now = time.time()
                    if ts in self.known_expts:
                        expt, last_update = self.known_expts[ts]
                        if now - last_update < self.interval:
                            # We've already seen this expt recently; skip
                            continue
                    self.known_expts[ts] = (expt, now)

                # Add this expt to the queue to be checked
                self.expt_queue.put((-ts, expt))

                count += 1
                if self.limit > 0 and count >= self.limit:
                    return
        self.update.emit(path, "Finished")
Beispiel #15
0
def all_slices():
    """Return a dict mapping {slice_timestamp: path} for all known slices.
    
    This is only generated once per running process; set _all_slices = None
    to force the list to be regenerated.
    """
    global _all_slices
    if _all_slices is not None:
        return _all_slices

    # Speed things up by caching this list with a 4 hour timeout
    cachefile = os.path.join(config.cache_path, 'all_slices.pkl')
    if os.path.exists(cachefile):
        age = time.time() - os.stat(cachefile).st_mtime
        if age < 4 * 3600:
            print("Loaded slice timestamps from cache (%0.1f hours old)" %
                  (age / 3600.))
            return pickle.load(open(cachefile, 'rb'))

    slice_dirs = sorted(
        glob.glob(os.path.join(config.synphys_data, '*', 'slice_*')))

    _all_slices = OrderedDict()
    for path in slice_dirs:
        dh = getDirHandle(path)
        ts = dh.info().get('__timestamp__')
        if ts is None:
            print("MISSING TIMESTAMP: %s" % path)
            continue
        _all_slices["%0.3f" % ts] = path

    try:
        tmpfile = cachefile + '.tmp'
        pickle.dump(_all_slices, open(tmpfile, 'wb'))
        os.rename(tmpfile, cachefile)
    except:
        if os.path.exists(tmpfile):
            os.remove(tmpfile)

    return _all_slices
    def poll(self):
        print("loading site paths..")
        #site_paths = glob.glob(os.path.join(config.synphys_data, '*', 'slice_*', 'site_*'))

        root_dh = getDirHandle(config.synphys_data)

        print(root_dh.name())
        for expt in root_dh.ls():
            expt_dh = root_dh[expt]
            print(expt_dh.name())
            if not expt_dh.isDir():
                continue
            for slice_name in expt_dh.ls():
                slice_dh = expt_dh[slice_name]
                if not slice_dh.isDir():
                    continue
                print(slice_dh.name())
                for site_name in slice_dh.ls():
                    site_dh = slice_dh[site_name]
                    if not site_dh.isDir():
                        continue
                    self.check(site_dh)
def all_slices():
    """Return a dict mapping {slice_timestamp: path} for all known slices.
    
    This is only generated once per running process; set _all_slices = None
    to force the list to be regenerated.
    """
    global _all_slices
    if _all_slices is not None:
        return _all_slices
        
    # Speed things up by caching this list with a 4 hour timeout
    cachefile = os.path.join(config.cache_path, 'all_slices.pkl')
    if os.path.exists(cachefile):
        age = time.time() - os.stat(cachefile).st_mtime
        if age < 4 * 3600:
            print("Loaded slice timestamps from cache (%0.1f hours old)" % (age/3600.))
            return pickle.load(open(cachefile, 'r'))
    
    slice_dirs = sorted(glob.glob(os.path.join(config.synphys_data, '*', 'slice_*')))
    _all_slices = OrderedDict()
    for path in slice_dirs:
        dh = getDirHandle(path)
        ts = dh.info().get('__timestamp__')
        if ts is None:
            print("MISSING TIMESTAMP: %s" % path)
            continue
        _all_slices[ts] = path
        
    try:
        tmpfile = cachefile+'.tmp'
        pickle.dump(_all_slices, open(tmpfile, 'w'))
        os.rename(tmpfile, cachefile)
    except:
        if os.path.exists(tmpfile):
            os.remove(tmpfile)
    
    return _all_slices
def find_submittable_expts():
    """Search synphys data storage for experiments that are ready to be submitted to LIMS.
    """
    
    # Start by finding all site paths that have an nwb file and a file_manifest.yml
    all_nwbs = glob.glob(os.path.join(config.synphys_data, '*', 'slice_*', 'site_*', '*.nwb'))
    all_sites = OrderedDict()    
    for nwb in all_nwbs:
        path = os.path.dirname(nwb)
        if 'file_manifest.yml' in os.listdir(path):
            all_sites[path] = 1
    
    # filter out anything that has been submitted already
    ready_sites = []
    for path in all_sites.keys():
        site_dh = getDirHandle(path)
        acq_ts = site_dh.info()['__timestamp__']
        spec_name = site_dh.parent().info()['specimen_ID'].strip()
        spec_id = lims.specimen_id_from_name(spec_name)
        subs = lims.expt_submissions(spec_id, acq_ts)
        if len(subs) == 0:
            ready_sites.append(site_dh)

    return ready_sites
def get_experiment_server_path(dh):
    server_path = config.synphys_data
    acq_timestamp = dh.info()['__timestamp__']

    # First check the cache
    cache = experiment_path_cache()
    if acq_timestamp in cache:
        return os.path.join(server_path, cache[acq_timestamp])

    # We have not already submitted a site from this experiment folder;
    # look for a suitable new directory name on the server
    expt_base_name = dh.shortName().split('_')[0]
    expt_dirs = set(os.listdir(server_path))
    i = 0
    while True:
        expt_name = expt_base_name + '_%03d' % i
        if expt_name not in expt_dirs:
            break
        i += 1
    server_expt_path = os.path.join(server_path, expt_name)
    assert not os.path.exists(server_expt_path)

    os.mkdir(server_expt_path)
    try:
        dh = getDirHandle(server_expt_path)
        # temporarily mark with timestamp; should be overwritten later.
        dh.setInfo(__timestamp__=acq_timestamp)
    except Exception:
        if os.path.exists(server_expt_path):
            shutil.rmtree(server_expt_path)
        raise

    cache[acq_timestamp] = expt_name
    write_expt_path_cache()

    return server_expt_path
    return None


root = sys.argv[1]
parser = argparse.ArgumentParser()
parser.add_argument('--set-data', action='store_true', default=False, dest='set-data')
args = vars(parser.parse_args(sys.argv[2:]))
set_data = args['set-data']

# find all subject folders that contain at least one site folder
sites = glob.glob(os.path.join(root, '*', 'slice_*', 'site_*'))
checked_days = set()
checked_slices = set()

for path in sites:
    site_dh = getDirHandle(path)
    slice_dh = site_dh.parent()
    day_dh = slice_dh.parent()
    sub_id = day_dh.info().get('animal_ID', None)
    expt_date = datetime.datetime.fromtimestamp(day_dh.info()['__timestamp__']).date()
    species = day_dh.info().get('species', None)
    if species != 'human':
        genotype = species
    if sub_id is not None and species is None:
        try:
            species = day_dh.info().get('LIMS_specimen_info')['organism']
            genotype = day_dh.info().get('LIMS_specimen_info')['genotype']
        except TypeError:
            try:
                species = day_dh.info().get('LIMS_donor_info')['organism']
            except TypeError:
root = sys.argv[1]
parser = argparse.ArgumentParser()
parser.add_argument('--set-data',
                    action='store_true',
                    default=False,
                    dest='set-data')
args = vars(parser.parse_args(sys.argv[2:]))
set_data = args['set-data']

# find all subject folders that contain at least one site folder
sites = glob.glob(os.path.join(root, '*', 'slice_*', 'site_*'))
checked_days = set()
checked_slices = set()

for path in sites:
    site_dh = getDirHandle(path)
    slice_dh = site_dh.parent()
    day_dh = slice_dh.parent()
    sub_id = day_dh.info().get('animal_ID', None)
    expt_date = datetime.datetime.fromtimestamp(
        day_dh.info()['__timestamp__']).date()
    species = day_dh.info().get('species', None)
    if species != 'human':
        genotype = species
    if sub_id is not None and species is None:
        try:
            species = day_dh.info().get('LIMS_specimen_info')['organism']
            genotype = day_dh.info().get('LIMS_specimen_info')['genotype']
        except TypeError:
            try:
                species = day_dh.info().get('LIMS_donor_info')['organism']
Beispiel #22
0
    def ready_jobs(self):
        """Return an ordered dict of all jobs that are ready to be processed (all dependencies are present)
        and the dates that dependencies were created.
        """

        slice_module = self.pipeline.get_module('opto_slice')
        finished_slices = slice_module.finished_jobs()

        # cache = synphys_cache.get_cache()
        # all_expts = cache.list_experiments()
        db = self.database
        session = db.session()
        slices = session.query(db.Slice.storage_path).all()
        slice_paths = [s[0] for s in slices]

        #ymls = []
        #for rec in slices:
        #    path = rec[0]
        #    ymls.extend(glob.glob(os.path.join(config.synphys_data, path, 'site_*', 'pipettes.yml')))
        expts = read_expt_csvs()

        n_errors = {}
        n_no_slice = []
        ready = OrderedDict()
        print("checking for ready expts....")
        for i, expt in enumerate(expts['expt_list']):
            #print("Checking experiment %i/%i"%(i, len(expts['expt_list'])))
            site_path = os.path.join(config.synphys_data, expt['site_path'])
            slice_path = getDirHandle(os.path.split(site_path)[0]).name(
                relativeTo=getDirHandle(config.synphys_data))
            #print slice_paths
            #if not slice_path in slice_paths:
            #    #print("Did not find slice path for %s"%slice_path)
            #    n_no_slice += 1
            #    continue
            try:
                if expt['site_path'] == '':
                    cnx_json = os.path.join(config.connections_dir,
                                            expt['experiment'])
                    ex = AI_Experiment(
                        loader=OptoExperimentLoader(load_file=cnx_json),
                        meta_info=expt)
                else:
                    ex = AI_Experiment(loader=OptoExperimentLoader(
                        site_path=site_path))

                raw_data_mtime = ex.last_modification_time

                slice_ts = ex.info.get('slice_info', {}).get('__timestamp__')
                if slice_ts is None:
                    slice_ts = 0.0
                slice_mtime, slice_success = finished_slices.get(
                    '%.3f' % slice_ts, (None, None))
                #print('found expt for path:', site_path)
            except Exception as exc:
                raise
                n_errors[expt['experiment']] = exc
                continue
            if slice_mtime is None or slice_success is False:
                #    slice_mtime = 0
                n_no_slice.append(expt['experiment'])
                continue

            ready[ex.ext_id] = {
                'dep_time': max(raw_data_mtime, slice_mtime),
                'meta': {
                    'source': site_path
                }
            }

        print(
            "Found %d experiments; %d are able to be processed, %d were skipped due to errors, %d were skipped due to missing or failed slice entries."
            % (len(expts['expt_list']), len(ready), len(n_errors),
               len(n_no_slice)))
        if len(n_errors) > 0 or len(n_no_slice) > 0:
            print("-------- skipped experiments: ----------")
            for e, exc in n_errors.items():
                print('     %s: Error - %s' % (e.split('_conn')[0], exc))
            for e in n_no_slice:
                print('     %s: skipped due to problem with slice' %
                      e.split('_conn')[0])
        return ready
Beispiel #23
0
    def create_db_entries(cls, job, session):
        job_id = job['job_id']
        db = job['database']

        slices = all_slices()
        path = slices[job_id]
        dh = getDirHandle(path)
        info = dh.info()
        parent_info = dh.parent().info()
        
        # pull some metadata from LIMS
        sid = info['specimen_ID'].strip()
        limsdata = lims.specimen_info(sid)

        quality = info.get('slice quality', None)
        try:
            quality = int(quality)
        except Exception:
            quality = None

        # Interpret slice time
        slice_time = parent_info.get('time_of_dissection', None)
        if slice_time == '':
            slice_time = None
        if slice_time is not None:
            m = re.match(r'((20\d\d)-(\d{1,2})-(\d{1,2}) )?(\d+):(\d+)', slice_time.strip())
            if m is not None:
                _, year, mon, day, hh, mm = m.groups()
                if year is None:
                    date = datetime.fromtimestamp(dh.parent().info()['__timestamp__'])
                    slice_time = datetime(date.year, date.month, date.day, int(hh), int(mm))
                else:
                    slice_time = datetime(int(year), int(mon), int(day), int(hh), int(mm))

        # construct full genotype string 
        genotype = limsdata['genotype'] or ''
        for info in (parent_info, info):
            inj = info.get('injections')
            if inj in (None, ''):
                continue
            if inj not in constants.INJECTIONS:
                raise KeyError("Injection %r is unknown in constants.INJECTIONS" % inj)
            genotype = ';'.join(genotype.split(';') + [constants.INJECTIONS[inj]])

        fields = {
            'ext_id': job_id,
            'acq_timestamp': info['__timestamp__'],
            'species': limsdata['organism'],
            'date_of_birth': limsdata['date_of_birth'],
            'age': limsdata['age'],
            'sex': limsdata['sex'],
            'genotype': genotype,
            'orientation': limsdata['plane_of_section'],
            'surface': limsdata['exposed_surface'],
            'hemisphere': limsdata['hemisphere'],
            'quality': quality,
            'slice_time': slice_time,
            'slice_conditions': {},
            'lims_specimen_name': sid,
            'storage_path': dh.name(relativeTo=dh.parent().parent()),
        }

        sl = db.Slice(**fields)
        session.add(sl)
Beispiel #24
0
    def create_db_entries(cls, job, session):
        job_id = job['job_id']
        db = job['database']

        slices = all_slices()
        path = slices[job_id]

        if path == 'place_holder':
            sl = db.Slice(storage_path='place_holder', acq_timestamp=0.0)
            session.add(sl)
            return

        dh = getDirHandle(path)
        info = dh.info()
        parent_info = dh.parent().info()
        
        # pull some metadata from LIMS
        #sid = self.find_specimen_name(dh)
        sids = data_model.find_lims_specimen_ids(dh)
        #print('sids:', sids)
        if len(sids) == 0:
            limsdata = {}
        elif len(sids) == 1:
            limsdata = lims.specimen_info(specimen_id=sids[0])
        elif len(sids) > 1:
            data = []
            for i in sids:
                data.append(lims.specimen_info(specimen_id=i))
            limsdata = {}
            for key in ['organism', 'date_of_birth', 'age', 'sex', 'plane_of_section', 'exposed_surface', 'hemisphere', 'specimen_name', 'genotype']:
                vals = list(set([d[key] for d in data]))
                if len(vals) == 1:
                    limsdata[key] = vals[0]


        quality = info.get('slice quality', None)
        try:
            quality = int(quality)
        except Exception:
            quality = None

        # Interpret slice time
        slice_time = parent_info.get('time_of_dissection', None)
        if slice_time is not None:
            m = re.match(r'((20\d\d)-(\d{1,2})-(\d{1,2}) )?(\d+):(\d+)', slice_time.strip())
            if m is not None:
                _, year, mon, day, hh, mm = m.groups()
                if year is None:
                    date = datetime.fromtimestamp(dh.parent().info()['__timestamp__'])
                    slice_time = datetime(date.year, date.month, date.day, int(hh), int(mm))
                else:
                    slice_time = datetime(int(year), int(mon), int(day), int(hh), int(mm))

        # construct full genotype string 
        genotype = limsdata.get('genotype', '')
        for info in (parent_info, info):
            inj = info.get('injections')
            if inj in (None, ''):
                continue
            if inj not in constants.INJECTIONS:
                raise KeyError("Injection %r is unknown in constants.INJECTIONS" % inj)
            genotype = genotype + ';' + constants.INJECTIONS[inj]


        fields = {
            'ext_id':'%.3f'%info['__timestamp__'],
            'acq_timestamp': info['__timestamp__'],
            'species': limsdata.get('organism'),
            'date_of_birth': limsdata.get('date_of_birth'),
            'age': limsdata.get('age'),
            'sex': limsdata.get('sex'),
            'genotype': genotype,
            'orientation': limsdata.get('plane_of_section'),
            'surface': limsdata.get('exposed_surface'),
            'hemisphere': limsdata.get('hemisphere'),
            'quality': quality,
            'slice_time': slice_time,
            'slice_conditions': {},
            'lims_specimen_name': limsdata.get('specimen_name'),
            'storage_path': dh.name(relativeTo=getDirHandle(config.synphys_data)),
        }

        sl = db.Slice(**fields)
        session.add(sl)
        session.commit()
Beispiel #25
0
 def __init__(self, slice_dir):
     self.slice_dir = slice_dir
     self.dh = getDirHandle(self.slice_dir)
     self._fields = None
Beispiel #26
0
import os, datetime, re
import config
import database
from acq4.util.DataManager import getDirHandle

for path in config.raw_data_paths:
    for fname in os.listdir(path):
        fname = os.path.join(path, fname)
        if not os.path.isdir(fname):
            continue

        dh = getDirHandle(fname)
        if not dh.isManaged():
            continue

        for sl in dh.ls():
            if not sl.startswith('slice_'):
                continue
            slice_dh = dh[sl]
            if not slice_dh.isDir():
                continue

            for site in slice_dh.ls():
                if not site.startswith('site_'):
                    continue
                site_dh = slice_dh[site]
                if not site_dh.isDir():
                    continue

                ts = site_dh.info()['__timestamp__']
                date = datetime.datetime.fromtimestamp(ts)