Esempio n. 1
0
    def __init__(self, external_name):
        if not os.path.isdir(PathResolver.datasets_output_path()):
            os.makedirs(PathResolver.datasets_output_path())

        self.external_name = external_name
        self.internal_name = self.detect_internal_name()
        NameConverter.register(self.external_name)

        self._contigs_count = None
Esempio n. 2
0
    def _make_datasets(self):
        NameConverter.load()

        input_path = PathResolver.input_path()
        files = [n.split('/')[-1] for n in glob.glob(input_path + '/*')]
        files.sort()

        groups = {}
        for gid, els in itertools.groupby(files, self._extract_external_name):
            groups[gid] = list(els)

        for group, files in groups.iteritems():
            if not group or len(files) < 3:
                continue

            file_group = {'reads_1': None, 'reads_2': None, 'contigs': None}

            for f in files:
                if re.match(DataManager.LEFT_READS_FNAME_REGEXP, f):
                    file_group['reads_1'] = f
                elif re.match(DataManager.RIGHT_READS_FNAME_REGEXP, f):
                    file_group['reads_2'] = f
                elif re.match(DataManager.CONTIGS_FNAME_REGEXP, f):
                    file_group['contigs'] = f

            if not None in file_group.values():
                ext_name = self._extract_external_name(file_group['contigs'])
                dataset = Dataset(ext_name)

                self.datasets.append(dataset)

        return self.datasets
Esempio n. 3
0
 def _find_dataset_file(self, file_name, extensions):
     path = PathResolver.input_path()
     paths = [
         glob.glob('%s/%s.%s' % (path, file_name, ext))
         for ext in extensions
     ]
     paths = sum(paths, [])
     if len(paths) > 0:
         return paths[0]
     else:
         return None
Esempio n. 4
0
    def perform(self):
        PathResolver.assure_path_exists(self._db_folder_path())

        all_vs_all_path = PathResolver.output_path_for(
            PathResolver.ALL_VS_ALL_FOLDER)
        PathResolver.assure_path_exists(all_vs_all_path)

        one_vs_all_path = PathResolver.output_path_for(
            PathResolver.ONE_VS_ALL_FOLDER)
        PathResolver.assure_path_exists(one_vs_all_path)

        self._make_databases()
        self._perform_blasts()
        self._make_one_vs_all_files()
Esempio n. 5
0
    def __init__(self, dataset):
        self.dataset = dataset

        folder_path = PathResolver.output_path_for(self.MAPPINGS_FOLDER)
        PathResolver.assure_path_exists(folder_path)

        db_path = PathResolver.output_path_for(self.MAPPINGS_FOLDER,
                                               self.DB_FOLDER)
        PathResolver.assure_path_exists(db_path)
Esempio n. 6
0
    def _make_one_vs_all_files(self):
        all_vs_all_path = PathResolver.output_path_for(
            PathResolver.ALL_VS_ALL_FOLDER)

        files = {}
        for f_path in glob.glob(all_vs_all_path +
                                '/*.%s' % self.BLAST_RESULT_EXT):
            left_org_id = os.path.splitext(
                os.path.basename(f_path))[0].split('_VS_')[0]

            if left_org_id not in files:
                files[left_org_id] = []
            files[left_org_id].append(f_path)

        for org_id, files in files.iteritems():
            org_file_name = org_id + '.' + self.BLAST_RESULT_EXT
            path = PathResolver.output_path_for(PathResolver.ONE_VS_ALL_FOLDER,
                                                org_file_name)

            if os.path.exists(path):
                os.remove(path)

            for f_path in files:
                subprocess.call('cat %s >> %s' % (f_path, path), shell=True)
Esempio n. 7
0
    def _perform_blast(self, left_path, right_path):
        command = 'blastn -query %s -db %s -out %s -outfmt "6 %s" -num_threads %s'

        right_org_name = os.path.splitext(os.path.basename(right_path))[0]
        left_org_name = os.path.splitext(os.path.basename(left_path))[0]
        db_path = self._db_folder_path(right_org_name)

        outfile_name = '%s_VS_%s.%s' % (left_org_name, right_org_name,
                                        self.BLAST_RESULT_EXT)
        output_path = PathResolver.output_path_for(
            PathResolver.ALL_VS_ALL_FOLDER, outfile_name)

        threads_cnt = Settings.winston.tools.blast.threads
        command = command % (left_path, db_path, output_path, self.COLUMNS,
                             threads_cnt)

        subprocess.call(command, shell=True)
Esempio n. 8
0
def main(args):
    logging.basicConfig(level=logging.getLevelName(args.logging))
    logging.info('User args: %s' % pformat(args))
    config = load_config_from_json_file(
        args.config,
        ['gap', 'same', 'diff', 'max_number_of_paths', 'max_sequence_length'])
    logging.info('Config is: \n%s' % pformat(config))

    seq1 = load_fasta_file(args.input1)
    seq2 = load_fasta_file(args.input2)
    if config['max_sequence_length'] != 0 and max(
            len(seq1), len(seq2)) > config['max_sequence_length']:
        raise ValueError('Sequence exceeded max_sequence_length ')

    score_matrix, nodes_mapping = solve(seq1, seq2, config['gap'],
                                        config['diff'], config['same'])

    logging.debug('Score matrix: \n%s' % pformat(score_matrix))
    logging.debug('Nodes mapping: (target_node): [(parent_node),...]\n%s' %
                  pformat(nodes_mapping))
    logging.info('Alignments score: %s' % score_matrix[len(seq1), len(seq2)])

    paths = PathResolver(nodes_mapping).resolve_paths(
        len(seq1), len(seq2), config['max_number_of_paths'])

    allignments = [get_allignments(path, seq1, seq2) for path in paths]

    for (allignment_1, allignment_2), i in zip(allignments,
                                               range(len(allignments))):
        logging.info('[A%04d] %s' % (i, allignment_1))
        logging.info('[A%04d] %s' % (i, allignment_2))

    if args.output:
        save_output(
            args.output, {
                'seq1': seq1,
                'seq2': seq2,
                'config': config,
                'allignments': allignments,
                'score_matrix': score_matrix.tolist()
            })
        logging.info('Saved output to %s' % args.output)
Esempio n. 9
0
    def load(file_path=None):
        if not file_path:
            file_path = PathResolver.pair_types_path()

        if TypesManager.types_dict:
            return True

        with open(file_path, 'r') as f:
            result = {}

            for row in csv.reader(f):
                key = TypesManager.detect_key(row[0], row[1])

                result[key] = {
                    'threshold': float(row[2]),
                    'type': row[3]
                    }

        TypesManager.types_dict = result

        return True
Esempio n. 10
0
 def _get_output_path(self, input_path):
     file_name = os.path.basename(input_path)
     file_name = input_file_name.replace(file_name, self.internal_name)
     return PathResolver.datasets_output_path(file_name)
 def _default_db_path():
     return PathResolver.output_path_for(
         DatabaseWorker.DEFAULT_DB_FILE_NAME)
Esempio n. 12
0
 def pileup_output_path(self):
     file_name = '%s_pileup.txt' % self.dataset.external_name
     return PathResolver.output_path_for(self.MAPPINGS_FOLDER, file_name)
Esempio n. 13
0
 def reads_output_paths(self):
     input_names = [os.path.basename(p) for p in self.reads_input_paths()]
     return [PathResolver.datasets_output_path(p) for p in input_names]
Esempio n. 14
0
 def pair_types_path():
     return PathResolver.output_path_for(PathResolver.TYPES_FILENAME)
Esempio n. 15
0
 def _db_folder_path(self, *inner_path):
     return PathResolver.output_path_for(PathResolver.BLAST_DB_FOLDER,
                                         *inner_path)
Esempio n. 16
0
 def _contigs_files(self):
     files = glob.glob(PathResolver.datasets_output_path() + '/*')
     return [
         e for e in files if re.match(DataManager.CONTIGS_FNAME_REGEXP, e)
     ]
Esempio n. 17
0
 def contigs_output_path(self):
     file_name = os.path.basename(self.contigs_input_path())
     return PathResolver.datasets_output_path(file_name)
Esempio n. 18
0
 def _dict_file_path():
     return PathResolver.output_path_for(NameConverter.DICT_FILENAME)
Esempio n. 19
0
 def test_path_resolving_from_top_left(self):
     nodes_mapping = PathResolvingTest.get_nodes_mapping()
     path_resolver = PathResolver(nodes_mapping)
     paths = path_resolver.resolve_paths(0, 0)
     self.assertEqual(len(paths), 1)
Esempio n. 20
0
 def test_path_resolving_from_bottom_right(self):
     nodes_mapping = PathResolvingTest.get_nodes_mapping()
     path_resolver = PathResolver(nodes_mapping)
     paths = path_resolver.resolve_paths(4, 5)
     self.assertEqual(len(paths), 2)
Esempio n. 21
0
 def sam_file_path(self):
     file_name = '%s.sam' % self.dataset.external_name
     paths = [self.MAPPINGS_FOLDER, file_name]
     return PathResolver.output_path_for(*paths)
Esempio n. 22
0
 def test_path_resolving_from_bottom_right_with_restrictions(self):
     max_number_of_paths = 1
     nodes_mapping = PathResolvingTest.get_nodes_mapping()
     path_resolver = PathResolver(nodes_mapping)
     paths = path_resolver.resolve_paths(4, 5, max_number_of_paths)
     self.assertEqual(len(paths), max_number_of_paths)
Esempio n. 23
0
 def db_path(self):
     return PathResolver.output_path_for(self.MAPPINGS_FOLDER,
                                         self.DB_FOLDER)
Esempio n. 24
0
 def _open_logs(self):
     for name, ext in self.FILES.iteritems():
         f_name = '%s_%s.%s' % (self.external_name, name, ext)
         self.logs[name] = open(PathResolver.results_path_for(f_name), 'w')