def _make_datasets(self): NameConverter.load() input_path = PathResolver.input_path() files = [n.split('/')[-1] for n in glob.glob(input_path + '/*')] files.sort() groups = {} for gid, els in itertools.groupby(files, self._extract_external_name): groups[gid] = list(els) for group, files in groups.iteritems(): if not group or len(files) < 3: continue file_group = {'reads_1': None, 'reads_2': None, 'contigs': None} for f in files: if re.match(DataManager.LEFT_READS_FNAME_REGEXP, f): file_group['reads_1'] = f elif re.match(DataManager.RIGHT_READS_FNAME_REGEXP, f): file_group['reads_2'] = f elif re.match(DataManager.CONTIGS_FNAME_REGEXP, f): file_group['contigs'] = f if not None in file_group.values(): ext_name = self._extract_external_name(file_group['contigs']) dataset = Dataset(ext_name) self.datasets.append(dataset) return self.datasets
def _find_dataset_file(self, file_name, extensions): path = PathResolver.input_path() paths = [ glob.glob('%s/%s.%s' % (path, file_name, ext)) for ext in extensions ] paths = sum(paths, []) if len(paths) > 0: return paths[0] else: return None