コード例 #1
0
    def _make_datasets(self):
        NameConverter.load()

        input_path = PathResolver.input_path()
        files = [n.split('/')[-1] for n in glob.glob(input_path + '/*')]
        files.sort()

        groups = {}
        for gid, els in itertools.groupby(files, self._extract_external_name):
            groups[gid] = list(els)

        for group, files in groups.iteritems():
            if not group or len(files) < 3:
                continue

            file_group = {'reads_1': None, 'reads_2': None, 'contigs': None}

            for f in files:
                if re.match(DataManager.LEFT_READS_FNAME_REGEXP, f):
                    file_group['reads_1'] = f
                elif re.match(DataManager.RIGHT_READS_FNAME_REGEXP, f):
                    file_group['reads_2'] = f
                elif re.match(DataManager.CONTIGS_FNAME_REGEXP, f):
                    file_group['contigs'] = f

            if not None in file_group.values():
                ext_name = self._extract_external_name(file_group['contigs'])
                dataset = Dataset(ext_name)

                self.datasets.append(dataset)

        return self.datasets
コード例 #2
0
 def _find_dataset_file(self, file_name, extensions):
     path = PathResolver.input_path()
     paths = [
         glob.glob('%s/%s.%s' % (path, file_name, ext))
         for ext in extensions
     ]
     paths = sum(paths, [])
     if len(paths) > 0:
         return paths[0]
     else:
         return None