コード例 #1
0
ファイル: learn_model.py プロジェクト: jbkinney/mpathic
def wrapper(args):

    #validate some of the input arguments
    qc.validate_input_arguments_for_learn_model(
        foreground=args.foreground,background=args.background,alpha=args.penalty,
        modeltype=args.modeltype,learningmethod=args.learningmethod,
        start=args.start,end=args.end,iteration=args.iteration,
        burnin=args.burnin,thin=args.thin,pseudocounts=args.pseudocounts,)

    inloc = io.validate_file_for_reading(args.i) if args.i else sys.stdin
    input_df = io.load_dataset(inloc)
    
    outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout
    #pdb.set_trace()

    output_df = main(input_df,lm=args.learningmethod,\
        modeltype=args.modeltype,db=args.db_filename,\
        LS_means_std=args.LS_means_std,\
        iteration=args.iteration,\
        burnin=args.burnin,thin=args.thin,start=args.start,end=args.end,\
        runnum=args.runnum,initialize=args.initialize,\
        foreground=args.foreground,background=args.background,\
        alpha=args.penalty,pseudocounts=args.pseudocounts,
        verbose=args.verbose)

    io.write(output_df,outloc)
コード例 #2
0
def wrapper(args):
    T_LibCounts = args.totallibcounts
    T_mRNACounts = args.totalmRNAcounts
    if T_LibCounts <=0 or T_mRNACounts <= 0:
        raise SortSeqError('Counts must be greater than zero')
    model_df = io.load_model(args.model)
    if args.i:
        df = pd.io.parsers.read_csv(args.i,delim_whitespace=True)
    else:
        df = pd.io.parsers.read_csv(sys.stdin,delim_whitespace=True)
    #make sure the library is not already sorted
    if len(utils.get_column_headers(df)) > 0:
         raise SortSeqError('Library already sorted!')
    header = df.columns
    libcounts,expcounts = main(df,model_df,T_LibCounts,T_mRNACounts,start=args.start,end=args.end)
    #add these counts to input dataframe
    lc = pd.Series(libcounts,name='ct_0')
    ec = pd.Series(expcounts,name='ct_1')
    df['ct_0'] = lc
    df['ct_1'] = ec
    df['ct'] = df[['ct_0','ct_1']].sum(axis=1)
    if args.out:
        outloc = open(args.out,'w')
    else:
        outloc = sys.stdout
    pd.set_option('max_colwidth',int(1e8))

    # Validate dataframe for writting
    df = qc.validate_dataset(df,fix=True)
    io.write(df,outloc)
コード例 #3
0
ファイル: simulate_sort.py プロジェクト: jbkinney/mpathic
def wrapper(args):

    try:
        npar = args.noiseparam.strip("[").strip("]").split(",")
    except:
        npar = []
    nbins = args.nbins
    # Run funciton
    if args.i:
        df = pd.io.parsers.read_csv(args.i, delim_whitespace=True, dtype={"seqs": str, "batch": int})
    else:
        df = pd.io.parsers.read_csv(sys.stdin, delim_whitespace=True, dtype={"seqs": str, "batch": int})
    if len(utils.get_column_headers(df)) > 0:
        raise SortSeqError("Library already sorted!")
    model_df = io.load_model(args.model)
    output_df = main(df, model_df, args.noisemodel, npar, nbins, start=args.start, end=args.end)

    if args.out:
        outloc = open(args.out, "w")
    else:
        outloc = sys.stdout
    pd.set_option("max_colwidth", int(1e8))

    # Validate dataframe for writting
    output_df = qc.validate_dataset(output_df, fix=True)
    io.write(output_df, outloc)
コード例 #4
0
ファイル: simulate_library.py プロジェクト: jbkinney/mpathic
def wrapper(args):
    """ Commandline wrapper for main()
    """  
    output_df = main(wtseq=args.wtseq, mutrate=args.mutrate,\
        numseq=args.numseqs,dicttype=args.type,tags=args.tags,\
        tag_length=args.tag_length)
    outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout
    io.write(output_df,outloc)
コード例 #5
0
ファイル: profile_mut.py プロジェクト: jbkinney/mpathic
def wrapper(args):
    """ Commandline wrapper for main()
    """
    inloc = io.validate_file_for_reading(args.i) if args.i else sys.stdin
    outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout
    input_df = io.load_dataset(inloc)
    output_df = main(input_df, bin=args.bin, start=args.start, end=args.end, err=args.err)
    io.write(output_df, outloc)
コード例 #6
0
ファイル: evaluate_model.py プロジェクト: jbkinney/mpathic
def wrapper(args):
    inloc = io.validate_file_for_reading(args.i) if args.i else sys.stdin
    dataset_df = io.load_dataset(inloc)
    model_df = io.load_model(args.model)
    output_df = main(dataset_df=dataset_df, model_df=model_df,\
        left=args.left, right=args.right)
    outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout
    io.write(output_df,outloc,fast=args.fast)
コード例 #7
0
ファイル: test_profile_mut.py プロジェクト: jbkinney/mpathic
    def test_profile_mut_bin_noerr(self):
        """ Test the ability of mpathic.profile_mut to compute mutation rates
        """

        print '\nIn test_profile_mut_bin_noerr_...'
        library_files = glob.glob(self.input_dir+'library_*.txt')
        library_files += glob.glob(self.input_dir+'dataset_*.txt')
        good_bin_num = 2
        bad_bin_num = 5
        for file_name in library_files:
            print '\t%s ='%file_name,
            description = file_name.split('_')[-1].split('.')[0]
            executable = lambda:\
                profile_mut.main(io.load_dataset(file_name),bin=good_bin_num,err=False)
            print '(bin=%d)'%good_bin_num,

            # If bad or library, then profile_mut.main should raise SortSeqError
            if ('_bad' in file_name) or ('library' in file_name):
                try:
                    self.assertRaises(SortSeqError,executable)
                    print 'badtype,',
                except:
                    print 'good (ERROR).'
                    raise

            # If good, then profile_mut.main should produce a valid df
            elif '_good' in file_name:
                try:
                    df = executable()
                    qc.validate_profile_mut(df)
                    out_file = self.output_dir+\
                        'profile_mut_bin_noerr_%s.txt'%description
                    io.write(df,out_file)           # Test writing
                    io.load_profile_mut(out_file)   # Test loading
                    print 'good,',

                except:
                    print 'bad (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')

            # Should always raise an error if bin num is too large
            executable = lambda:\
                profile_mut.main(io.load_dataset(file_name),bin=bad_bin_num)
            print '(bin=%d)'%bad_bin_num,
            try:
                self.assertRaises(SortSeqError,executable)
                print 'badtype.'
            except:
                print 'good (ERROR).'
                raise
コード例 #8
0
ファイル: test_profile_freq.py プロジェクト: irelandb/mpathic
    def test_profile_freq_bincounts(self):
        """ Test the ability of mpathic.profile_freq to count frequencies
        """

        print '\nIn test_profile_freq_bincounts...'
        library_files = glob.glob(self.input_dir + 'library_*.txt')
        library_files += glob.glob(self.input_dir + 'dataset_*.txt')
        good_bin_num = 2
        bad_bin_num = 5
        for file_name in library_files:
            print '\t%s =' % file_name,
            description = file_name.split('_')[-1].split('.')[0]
            executable = lambda:\
                profile_freq.main(io.load_dataset(file_name),bin=good_bin_num)
            print '(bin=%d)' % good_bin_num,

            # If bad or library, then profile_freq.main should raise SortSeqError
            if ('_bad' in file_name) or ('library' in file_name):
                try:
                    self.assertRaises(SortSeqError, executable)
                    print 'badtype,',
                except:
                    print 'good (ERROR).'
                    raise

            # If good, then profile_freq.main should produce a valid df
            elif ('_good' in file_name) or ('dataset' in file_name):
                try:
                    df = executable()
                    qc.validate_profile_freq(df)
                    out_file = self.output_dir+\
                        'profile_freq_bin_%s.txt'%description
                    io.write(df, out_file)
                    io.load_profile_freq(out_file)
                    print 'good,',

                except:
                    print 'bad (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')

            # Should always raise an error if bin num is too large
            executable = lambda:\
                profile_freq.main(io.load_dataset(file_name),bin=bad_bin_num)
            print '(bin=%d)' % bad_bin_num,
            try:
                self.assertRaises(SortSeqError, executable)
                print 'badtype.'
            except:
                print 'good (ERROR).'
                raise
コード例 #9
0
ファイル: test_preprocess.py プロジェクト: irelandb/mpathic
    def test_preprocess(self):
        """ Test the ability of mpathic.preprocess to collate data in multiple sequence files
        """

        print '\nIn test_preprocess...'
        file_names = glob.glob(self.input_dir + 'files_*.txt')

        # Make sure there are files to test
        self.assertTrue(len(file_names) > 0)

        for file_name in file_names:
            print '\t%s =' % file_name,
            description = file_name.split('_')[-1].split('.')[0]

            # If fasta or fastq, assume dna
            if ('fasta' in file_name) or ('fastq' in file_name):
                seq_type = 'dna'
            else:
                seq_type = None

            executable = lambda: preprocess.main(io.load_filelist(file_name),
                                                 indir=self.input_dir,
                                                 seq_type=seq_type)

            # If _good_, then preprocess.main should produce a valid df
            if ('_good' in file_name) or ('_fix' in file_name):
                try:
                    df = executable()
                    qc.validate_dataset(df)
                    out_file = self.output_dir + 'dataset_%s.txt' % description
                    io.write(df, out_file)  # Test write
                    io.load_dataset(out_file)  # Test loading
                    print 'good.'
                except:
                    print 'bad (ERROR).'
                    raise

            # If _bad, then preprocess.main should raise SortSeqError
            elif '_bad' in file_name:
                try:
                    self.assertRaises(SortSeqError, executable)
                    print 'badtype.'
                except:
                    print 'good (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')
コード例 #10
0
ファイル: test_preprocess.py プロジェクト: jbkinney/mpathic
    def test_preprocess(self):
        """ Test the ability of mpathic.preprocess to collate data in multiple sequence files
        """

        print '\nIn test_preprocess...'
        file_names = glob.glob(self.input_dir+'files_*.txt')

        # Make sure there are files to test
        self.assertTrue(len(file_names)>0)

        for file_name in file_names:
            print '\t%s ='%file_name,
            description = file_name.split('_')[-1].split('.')[0]

            # If fasta or fastq, assume dna
            if ('fasta' in file_name) or ('fastq' in file_name):
                seq_type = 'dna'
            else:
                seq_type = None

            executable = lambda: preprocess.main(io.load_filelist(file_name),indir=self.input_dir, seq_type=seq_type)

            # If _good_, then preprocess.main should produce a valid df
            if ('_good' in file_name) or ('_fix' in file_name):
                try:
                    df = executable()
                    qc.validate_dataset(df)
                    out_file = self.output_dir+'dataset_%s.txt'%description
                    io.write(df,out_file)       # Test write
                    io.load_dataset(out_file)   # Test loading
                    print 'good.'
                except:
                    print 'bad (ERROR).'
                    raise

            # If _bad, then preprocess.main should raise SortSeqError
            elif '_bad' in file_name:
                try:
                    self.assertRaises(SortSeqError,executable)
                    print 'badtype.'
                except:
                    print 'good (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')
コード例 #11
0
def wrapper(args):
    """ Commandline wrapper for main()
    """
    inloc = io.validate_file_for_reading(args.i) if args.i else sys.stdin
    outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout
    if args.bins_df_name:
        bins_df = pd.io.parsers.read_csv(args.bins_df_name,
                                         delim_whitespace=True)
    else:
        bins_df = None
    input_df = io.load_dataset(inloc)
    output_df = main(input_df,
                     bin=args.bin,
                     start=args.start,
                     end=args.end,
                     bins_df=bins_df,
                     pseudocounts=args.pseudocounts,
                     return_profile=args.return_profile)
    io.write(output_df, outloc)
コード例 #12
0
ファイル: validate.py プロジェクト: jbkinney/mpathic
def wrapper(args):
    """ Wrapper for functions io.load_* and io.write
    """  

    # Determine input and output
    inloc = io.validate_file_for_reading(args.i) if args.i else sys.stdin
    outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout

    try:
        # Get load function corresponding to file type
        func = filetype_to_loadfunc_dict[str(args.type)]

        # Run load function on input
        df = func(inloc)

        # Write df to stdout or to outfile 
        io.write(df,outloc,fast=args.fast)

    except SortSeqError:
        raise
コード例 #13
0
ファイル: preprocess.py プロジェクト: jbkinney/mpathic
def wrapper(args):
    """ Commandline wrapper for main()
    """  

    inloc = io.validate_file_for_reading(args.i) if args.i else sys.stdin
    outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout
    
    # Get filelist
    filelist_df = io.load_filelist(inloc)
    inloc.close()

    # Get tagkeys dataframe if provided
    if args.tagkeys:
        tagloc = io.validate_file_for_reading(args.tagkeys) 
        tags_df = io.load_tagkey(tagloc)
        tagloc.close()
    else:
        tags_df = None
    
    output_df = main(filelist_df,tags_df=tags_df,seq_type=args.seqtype)
    io.write(output_df,outloc,fast=args.fast)
コード例 #14
0
ファイル: test_io.py プロジェクト: jbkinney/mpathic
    def generic_test(self,test_name,function_str,file_names,allbad=False):
        """ 
        Standardizes tests for different dataframe loading functions.
        The argument function_str must have "%s" where file_name goes. 
        Example:
        generic_test('test_io_load_tagkey','io.load_tagkey("%s"),file_names)'
        """
        print '\nIn %s...'%test_name   

        # Make sure there are files to test
        self.assertTrue(len(file_names)>0)

        # For each file, run test
        for file_name in file_names:
            executable = lambda: eval(function_str%file_name)
            print '\t%s ='%file_name,
            if not allbad and any([c in file_name for c in \
                ['_good','_fix','_badio','_badtype']]):
                try:
                    df = executable()
                    self.assertTrue(df.shape[0]>=1)
                    # Write df
                    base_filename = file_name.split('/')[-1]
                    io.write(df,self.output_dir+'loaded_'+base_filename)
                    print 'good.'
                except:
                    print 'bad (ERROR).'
                    raise

            elif allbad or ('_bad' in file_name):
                try:
                    self.assertRaises(SortSeqError,executable)
                    print 'bad.'
                except:
                    print 'good (ERROR).'
                    raise
            else:
                print 'what should I expect? (ERROR)'
                raise
        print '\tDone.'
コード例 #15
0
ファイル: test_io.py プロジェクト: irelandb/mpathic
    def generic_test(self, test_name, function_str, file_names, allbad=False):
        """ 
        Standardizes tests for different dataframe loading functions.
        The argument function_str must have "%s" where file_name goes. 
        Example:
        generic_test('test_io_load_tagkey','io.load_tagkey("%s"),file_names)'
        """
        print '\nIn %s...' % test_name

        # Make sure there are files to test
        self.assertTrue(len(file_names) > 0)

        # For each file, run test
        for file_name in file_names:
            executable = lambda: eval(function_str % file_name)
            print '\t%s =' % file_name,
            if not allbad and any([c in file_name for c in \
                ['_good','_fix','_badio','_badtype']]):
                try:
                    df = executable()
                    self.assertTrue(df.shape[0] >= 1)
                    # Write df
                    base_filename = file_name.split('/')[-1]
                    io.write(df, self.output_dir + 'loaded_' + base_filename)
                    print 'good.'
                except:
                    print 'bad (ERROR).'
                    raise

            elif allbad or ('_bad' in file_name):
                try:
                    self.assertRaises(SortSeqError, executable)
                    print 'bad.'
                except:
                    print 'good (ERROR).'
                    raise
            else:
                print 'what should I expect? (ERROR)'
                raise
        print '\tDone.'
コード例 #16
0
ファイル: test_profile_info.py プロジェクト: jbkinney/mpathic
    def test_profile_info(self):
        """ Test the ability of mpathic.profile_info to compute mutation rates based on total count values
        """

        print '\nIn test_profile_info...'
        file_names = glob.glob(self.input_dir+'dataset_*.txt')
        for err in [True,False]:
            for file_name in file_names:
                print '\t%s, err=%s ='%(file_name,str(err)),
                description = file_name.split('_')[-1].split('.')[0]
                executable = lambda: \
                    profile_info.main(io.load_dataset(file_name),err=err)

                # If good, then profile_info.main should produce a valid df
                if '_good' in file_name:
                    try:
                        df = executable()
                        qc.validate_profile_info(df)
                        out_file = self.output_dir+\
                            'profile_info_%s_err_%s.txt'%(description,str(err))
                        io.write(df,out_file)
                        io.load_profile_info(out_file)
                        print 'good.'
                    except:
                        print 'bad (ERROR).'
                        raise

                # If bad, then profile_info.main should raise SortSeqError
                elif '_bad' in file_name:
                    try:
                        self.assertRaises(SortSeqError,executable)
                        print 'badtype.'
                    except:
                        print 'good (ERROR).'
                        raise

                # There are no other options
                else:
                    raise SortSeqError('Unrecognized class of file_name.')
コード例 #17
0
    def test_profile_info(self):
        """ Test the ability of mpathic.profile_info to compute mutation rates based on total count values
        """

        print '\nIn test_profile_info...'
        file_names = glob.glob(self.input_dir + 'dataset_*.txt')
        for err in [True, False]:
            for file_name in file_names:
                print '\t%s, err=%s =' % (file_name, str(err)),
                description = file_name.split('_')[-1].split('.')[0]
                executable = lambda: \
                    profile_info.main(io.load_dataset(file_name),err=err)

                # If good, then profile_info.main should produce a valid df
                if '_good' in file_name:
                    try:
                        df = executable()
                        qc.validate_profile_info(df)
                        out_file = self.output_dir+\
                            'profile_info_%s_err_%s.txt'%(description,str(err))
                        io.write(df, out_file)
                        io.load_profile_info(out_file)
                        print 'good.'
                    except:
                        print 'bad (ERROR).'
                        raise

                # If bad, then profile_info.main should raise SortSeqError
                elif '_bad' in file_name:
                    try:
                        self.assertRaises(SortSeqError, executable)
                        print 'badtype.'
                    except:
                        print 'good (ERROR).'
                        raise

                # There are no other options
                else:
                    raise SortSeqError('Unrecognized class of file_name.')
コード例 #18
0
ファイル: test_profile_freq.py プロジェクト: jbkinney/mpathic
    def test_profile_freq_totalcounts(self):
        """ Test the ability of mpathic.profile_freq to compute frequencies based on total count values
        """

        print '\nIn test_profile_freq_totalcounts...'
        library_files = glob.glob(self.input_dir+'library_*.txt')
        library_files += glob.glob(self.input_dir+'dataset_*.txt')
        for file_name in library_files:
            print '\t%s ='%file_name,
            description = file_name.split('_')[-1].split('.')[0]
            executable = lambda: profile_freq.main(io.load_dataset(file_name))

            # If good, then profile_freq.main should produce a valid df
            if '_good' in file_name:
                try:
                    df = executable()
                    qc.validate_profile_freq(df)
                    out_file = self.output_dir+\
                        'profile_freq_total_%s.txt'%description
                    io.write(df,out_file)
                    io.load_profile_freq(out_file)
                    print 'good.'
                except:
                    print 'bad (ERROR).'
                    raise

            # If bad, then profile_freq.main should raise SortSeqError
            elif '_bad' in file_name:
                try:
                    self.assertRaises(SortSeqError,executable)
                    print 'badtype.'
                except:
                    print 'good (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')
コード例 #19
0
ファイル: test_profile_freq.py プロジェクト: irelandb/mpathic
    def test_profile_freq_totalcounts(self):
        """ Test the ability of mpathic.profile_freq to compute frequencies based on total count values
        """

        print '\nIn test_profile_freq_totalcounts...'
        library_files = glob.glob(self.input_dir + 'library_*.txt')
        library_files += glob.glob(self.input_dir + 'dataset_*.txt')
        for file_name in library_files:
            print '\t%s =' % file_name,
            description = file_name.split('_')[-1].split('.')[0]
            executable = lambda: profile_freq.main(io.load_dataset(file_name))

            # If good, then profile_freq.main should produce a valid df
            if '_good' in file_name:
                try:
                    df = executable()
                    qc.validate_profile_freq(df)
                    out_file = self.output_dir+\
                        'profile_freq_total_%s.txt'%description
                    io.write(df, out_file)
                    io.load_profile_freq(out_file)
                    print 'good.'
                except:
                    print 'bad (ERROR).'
                    raise

            # If bad, then profile_freq.main should raise SortSeqError
            elif '_bad' in file_name:
                try:
                    self.assertRaises(SortSeqError, executable)
                    print 'badtype.'
                except:
                    print 'good (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')
コード例 #20
0
ファイル: test_profile_freq.py プロジェクト: irelandb/mpathic
    def test_profile_freq_seqslicing(self):
        """ Test the ability of mpathic.profile_freq to slice sequences properly, and to raise the correct errors
        """

        print '\nIn test_profile_freq_seqslicing...'
        library_files = glob.glob(self.input_dir + 'library_*.txt')
        library_files += glob.glob(self.input_dir + 'dataset_*.txt')
        for file_name in library_files:
            print '\t%s =' % file_name,
            description = file_name.split('_')[-1].split('.')[0]
            executable_good1 =\
                lambda: profile_freq.main(io.load_dataset(file_name),\
                    start=2,end=10)
            executable_good2 =\
                lambda: profile_freq.main(io.load_dataset(file_name),\
                    start=2)
            executable_good3 =\
                lambda: profile_freq.main(io.load_dataset(file_name),\
                    end=2)
            executable_nopro =\
                lambda: profile_freq.main(io.load_dataset(file_name),\
                    start=50,end=60)
            executable_bad1 =\
                lambda: profile_freq.main(io.load_dataset(file_name),\
                    start=-1)
            executable_bad2 =\
                lambda: profile_freq.main(io.load_dataset(file_name),\
                    end=100)
            executable_bad3 =\
                lambda: profile_freq.main(io.load_dataset(file_name),\
                    start=20,end=10)

            # If good, then sequences will be valid
            if 'good' in file_name:
                try:
                    df = executable_good1()
                    io.write(df,self.output_dir+\
                        'profile_freq_splice2-10_%s.txt'%description)
                    executable_good2()
                    executable_good3()
                    self.assertRaises(SortSeqError, executable_bad1)
                    self.assertRaises(SortSeqError, executable_bad2)
                    self.assertRaises(SortSeqError, executable_bad3)
                    if '_pro' in file_name:
                        self.assertRaises(SortSeqError, executable_nopro)
                    else:
                        df = executable_nopro()
                    print 'ok.'
                except:
                    print 'ok (ERROR).'
                    raise

            # If bad, then profile_freq.main should raise SortSeqError
            elif '_bad' in file_name:
                try:
                    self.assertRaises(SortSeqError, executable_good1)
                    self.assertRaises(SortSeqError, executable_good2)
                    self.assertRaises(SortSeqError, executable_good3)
                    self.assertRaises(SortSeqError, executable_nopro)
                    self.assertRaises(SortSeqError, executable_bad1)
                    self.assertRaises(SortSeqError, executable_bad2)
                    self.assertRaises(SortSeqError, executable_bad3)
                    print 'ok.'
                except:
                    print 'not ok (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')
コード例 #21
0
ファイル: test_profile_mut.py プロジェクト: jbkinney/mpathic
    def test_profile_mut_seqslicing(self):
        """ Test the ability of mpathic.profile_mut to slice sequences properly, and to raise the correct errors
        """

        print '\nIn test_profile_mut_seqslicing...'
        library_files = glob.glob(self.input_dir+'library_*.txt')
        library_files += glob.glob(self.input_dir+'dataset_*.txt')
        for file_name in library_files:
            print '\t%s ='%file_name,
            description = file_name.split('_')[-1].split('.')[0]
            executable_good1 =\
                lambda: profile_mut.main(io.load_dataset(file_name),\
                    start=2,end=10)
            executable_good2 =\
                lambda: profile_mut.main(io.load_dataset(file_name),\
                    end=10)
            executable_good3 =\
                lambda: profile_mut.main(io.load_dataset(file_name),\
                    end=2)
            executable_nopro =\
                lambda: profile_mut.main(io.load_dataset(file_name),\
                    start=50,end=60)
            executable_bad1 =\
                lambda: profile_mut.main(io.load_dataset(file_name),\
                    start=-1)
            executable_bad2 =\
                lambda: profile_mut.main(io.load_dataset(file_name),\
                    end=100)
            executable_bad3 =\
                lambda: profile_mut.main(io.load_dataset(file_name),\
                    start=20,end=10)

            # If good, then sequences will be valid
            if '_good' in file_name:
                try:
                    df = executable_good1()
                    io.write(df,self.output_dir+\
                        'profile_mut_splice2-10_%s.txt'%description)
                    executable_good2()
                    executable_good3()
                    self.assertRaises(SortSeqError,executable_bad1)
                    self.assertRaises(SortSeqError,executable_bad2)
                    self.assertRaises(SortSeqError,executable_bad3)
                    if '_pro' in file_name:
                        self.assertRaises(SortSeqError,executable_nopro)
                    else:
                        df = executable_nopro()
                    print 'ok.'
                except:
                    print 'ok (ERROR).'
                    raise

            # If bad, then profile_mut.main should raise SortSeqError
            elif '_bad' in file_name:
                try:
                    self.assertRaises(SortSeqError,executable_good1)
                    self.assertRaises(SortSeqError,executable_good2)
                    self.assertRaises(SortSeqError,executable_good3)
                    self.assertRaises(SortSeqError,executable_nopro)
                    self.assertRaises(SortSeqError,executable_bad1)
                    self.assertRaises(SortSeqError,executable_bad2)
                    self.assertRaises(SortSeqError,executable_bad3)
                    print 'ok.'
                except:
                    print 'not ok (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')
コード例 #22
0
ファイル: scan_model.py プロジェクト: jbkinney/mpathic
def wrapper(args):
    """ Wrapper for function for scan_model.main()
    """

    # Prepare input to main
    model_df = io.load_model(args.model)
    seqtype, modeltype = qc.get_model_type(model_df)
    L = model_df.shape[0]
    if modeltype == "NBR":
        L += 1

    chunksize = args.chunksize
    if not chunksize > 0:
        raise SortSeqError("chunksize=%d must be positive" % chunksize)

    if args.numsites <= 0:
        raise SortSeqError("numsites=%d must be positive." % args.numsites)

    if args.i and args.seq:
        raise SortSeqError("Cannot use flags -i and -s simultaneously.")

    # If sequence is provided manually
    if args.seq:
        pos_offset = 0
        contig_str = args.seq

        # Add a bit on end if circular
        if args.circular:
            contig_str += contig_str[: L - 1]

        contig_list = [(contig_str, "manual", pos_offset)]

    # Otherwise, read sequence from FASTA file
    else:
        contig_list = []
        inloc = io.validate_file_for_reading(args.i) if args.i else sys.stdin
        for i, record in enumerate(SeqIO.parse(inloc, "fasta")):
            name = record.name if record.name else "contig_%d" % i

            # Split contig up into chunk)size bits
            full_contig_str = str(record.seq)

            # Add a bit on end if circular
            if args.circular:
                full_contig_str += full_contig_str[: L - 1]

            # Define chunks containing chunksize sites
            start = 0
            end = start + chunksize + L - 1
            while end < len(full_contig_str):
                contig_str = full_contig_str[start:end]
                contig_list.append((contig_str, name, start))
                start += chunksize
                end = start + chunksize + L - 1
            contig_str = full_contig_str[start:]
            contig_list.append((contig_str, name, start))

        if len(contig_list) == 0:
            raise SortSeqError("No input sequences to read.")

    # Compute results
    outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout
    output_df = main(model_df, contig_list, numsites=args.numsites, verbose=args.verbose)

    # Write df to stdout or to outfile
    io.write(output_df, outloc, fast=args.fast)