def parse_infile_names(self): """try to get subject and possibly group names from infiles fill self.snames and self.gnames, if possible 1. get SID - if files look like out.ss_review.SID.txt, that is a good start - else, look for varying part of filename 2. get GID - replace SID in infile names and for varying group name """ rv, slist = UTIL.list_minus_pref_suf(self.infiles,'out.ss_review.','.txt') if rv < 0: return if rv > 0: if self.verb > 1: print '++ trying to get SID from glob form' slist = UTIL.list_minus_glob_form(self.infiles, strip='dir') else: if self.verb > 1: print "++ have SIDs from 'out.ss_reiview' form" if len(slist) == 0: if self.verb > 1: print "-- empty SID list" return # make sure names are unique and not empty if not UTIL.vals_are_unique(slist): if self.verb > 1: print '-- SIDs not detected: not unique' return minlen = min([len(ss) for ss in slist]) if minlen < 1: if self.verb > 1: print '-- SIDs not detected: some would be empty' return # we have a subject list self.snames = slist # now go for GID, start by replacing SIDs in infiles newfiles = [fname.replace(slist[ind], 'SUBJ') for ind, fname in enumerate(self.infiles)] if UTIL.vals_are_constant(newfiles): print '-- no groups detected from filenames' return # okay, try to make a group list glist = UTIL.list_minus_glob_form(newfiles) # cannot have dirs in result for gid in glist: if gid.find('/') >= 0: if self.verb>1: print '-- no GIDs, dirs vary in multiple places' return minlen = min([len(ss) for ss in glist]) if minlen < 1: if self.verb > 1: print '-- GIDs not detected: some would be empty' return if self.verb > 1: print "++ have GIDs from infiles" self.gnames = glist
def set_ids_from_dsets(self, prefix='', suffix='', hpad=0, tpad=0, dpre=0): """use the varying part of the dataset names for subject IDs If hpad > 0 or tpad > 0, expand into the head or tail of the dsets. If prefix or suffix is passed, apply them. return 0 on success, 1 on error """ if hpad < 0 or tpad < 0: print('** set_ids_from_dsets: will not apply negative padding') return 1 # try filenames without paths, first dlist = [s.dset.split('/')[-1] for s in self.subjects] if UTIL.vals_are_constant(dlist): print('** constant dataset names (%s)' % dlist[0]) print(' trying directories...') dlist = [s.dset for s in self.subjects] slist = UTIL.list_minus_glob_form(dlist, hpad, tpad, keep_dent_pre=dpre) # in the case of diretories, check for success # (maybe we can try to skip past them, that might be okay) for index in range(len(slist)): if '/' in slist[index]: posn = slist[index].rfind('/') slist[index] = slist[index][posn + 1:] if len(slist[index]) < 1: print( '** failed to extract subject IDs from directory list') print(' (directories do not vary at single level)') return 1 if len(slist) != len(self.subjects): print('** failed to set SIDs from dset names\n' \ ' dsets = %s\n' \ ' slist = %s' % (dlist, slist)) return 1 if not UTIL.vals_are_unique(slist): print('** cannot set IDs from dsets, labels not unique: %s' % slist) print('-- labels come from dsets: %s' % dlist) return 1 for ind, subj in enumerate(self.subjects): subj.sid = '%s%s%s' % (prefix, slist[ind], suffix) return 0
def set_ids_from_dsets(self, prefix='', suffix='', hpad=0, tpad=0, dpre=0): """use the varying part of the dataset names for subject IDs If hpad > 0 or tpad > 0, expand into the head or tail of the dsets. If prefix or suffix is passed, apply them. return 0 on success, 1 on error """ if hpad < 0 or tpad < 0: print '** set_ids_from_dsets: will not apply negative padding' return 1 # try filenames without paths, first dlist = [s.dset.split('/')[-1] for s in self.subjects] if UTIL.vals_are_constant(dlist): print '** constant dataset names (%s)' % dlist[0] print ' trying directories...' dlist = [s.dset for s in self.subjects] slist = UTIL.list_minus_glob_form(dlist, hpad, tpad, keep_dent_pre=dpre) # in the case of diretories, check for success # (maybe we can try to skip past them, that might be okay) for index in range(len(slist)): if '/' in slist[index]: posn = slist[index].rfind('/') slist[index] = slist[index][posn+1:] if len(slist[index]) < 1: print '** failed to extract subject IDs from directory list' print ' (directories do not vary at single level)' return 1 if len(slist) != len(self.subjects): print '** failed to set SIDs from dset names\n' \ ' dsets = %s\n' \ ' slist = %s' % (dlist, slist) return 1 if not UTIL.vals_are_unique(slist): print '** cannot set IDs from dsets, labels not unique: %s' % slist print '-- labels come from dsets: %s' % dlist return 1 for ind, subj in enumerate(self.subjects): subj.sid = '%s%s%s' % (prefix, slist[ind], suffix) return 0