Exemple #1
0
def main():
    # Get the name of the config file
    config_file = leverage_efficiency.base.get_config_filename(sys.argv)

    # Extract the data from source data folder into common format
    import extract
    extract.main(config_file)

    # Update data with most recent values (optional)
    #import update       # This doesn't connect to the rest of the pipeline yet
    #update.main(config_file)

    # Calculate derived quantities like returns for input into calculations
    import transform
    transform.main(config_file)

    # Perform leverage efficiency calculations
    import analysis
    analysis.main(config_file)

    # Create figures
    import plots
    plots.main(config_file)

    # Create exact figures used in the paper
    import paper_plots
    paper_plots.main(config_file)

    # Create figures used in the EE lecture notes
    import lecture_plots
    lecture_plots.main(config_file)
Exemple #2
0
def test_data_is_unchanged():
    fp1 = Path("data/raw_data.csv")
    d1 = os.stat(fp1)
    analysis.main()
    fp2 = Path("data/raw_data.csv")
    d2 = os.stat(fp2)
    assert (d1.st_mode == d2.st_mode and d1.st_ino == d2.st_ino
            and d1.st_dev == d2.st_dev and d1.st_nlink == d1.st_nlink
            and d1.st_uid == d2.st_uid and d1.st_gid == d2.st_gid
            and d1.st_size == d2.st_size and d1.st_mtime == d2.st_mtime
            and d1.st_ctime == d2.st_ctime)
def main():
    print config.SCORE_TYPE

    for experiment in config.EXPERIMENTS:
        now = time.time()
        print "\nEXPERIMENT: {}\n".format(experiment)
        generate_scores.main(config.SHARDS[experiment], experiment)
        rank_answers.main(experiment)
        evaluate.main(experiment)
        analysis.main(experiment)
        lstring = 'experiment: {}\ntime = {} sec'
        print lstring.format(str(experiment), str(time.time() - now))
    print "Done!"
Exemple #4
0
 def map_and_analyze(self, eqfil=None):
     if self.mapped is None:
         logger.debug('Mapping disabled.')
     elif self.mapped is True:
         logger.debug('is already mapped (skipping)!')
         return True
     elif self.mapped is False:
         with tools.cd(self.path):
             if eqfil is None:
                 self.mapped = analysis.main(self.map_settings, eqfil)
             else:
                 analysis.main(self.map_settings, eqfil)
     else:
         raise 'WTF'
def test_analysis_main(tmpdir, analysis, module, symptoms, hce, cause_list,
                       resample_test, subset):
    kwargs = {
        'clf': 'random',
        'analysis': analysis,
        'module': module,
        'symptoms': symptoms,
        'hce': hce,
        'cause_list': cause_list,
        'resample_test': resample_test,
        'resample_size': 1,
        'subset': subset,
        'n_splits': 2,
        'test_size': 0.25,
        'holdout_n': 1,
        'outdir': tmpdir.strpath
    }
    main(**kwargs)
Exemple #6
0
def main():
    
    path = '/Users/tinghai/Learning/GuanggaoData'
    os.chdir(path + '/source')
    
    import analysis as an
    an.main()
    
    os.chdir(path)
    
    lightGBM = pd.read_csv('./result/submit_lightGBM.csv', header=0, sep=' ')
    xgboost = pd.read_csv('./result/submit_XGBoost.csv', header=0, sep=' ')
    lightGBM_xgboost = pd.read_csv('./result_fusion/submit_construct_lightGBM_predict_XGBoost.csv', header=0, sep=' ')
    xgboost_lightGBM = pd.read_csv('./result_fusion/submit_construct_XGBoost_predict_lightGBM.csv', header=0, sep=' ')
    
    result = 0.25 * xgboost.iloc[:,1] + 0.25 * lightGBM.iloc[:,1] + 0.35 * lightGBM_xgboost.iloc[:,1] + 0.15 * xgboost_lightGBM.iloc[:,1]
    result2 = pd.concat([lightGBM.iloc[:,0],pd.DataFrame(result)],axis=1)
    result2.to_csv(('./result/submit_integrate_' + datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + ".txt"), 
                index=False, index_label=None, header=['instance_id','predicted_score'], sep=' ')
    def respond(self, strInput):
        if strInput.endswith('.pdf'):
            extract.extract_file(strInput, 1)
            analysis.main(strInput)
            answer = 'Your analysis is ready and saved in the parent directory!!'
            return answer

        elif strInput.lower() in ['exit', 'goodbye']:
            answer = self.findmatch(strInput)
            return answer

        else:
            answer = self.findmatch(strInput)

        if answer == '':
            self.saveUnknownInput(strInput)
            return self.listen()
        else:
            return answer
Exemple #8
0
def run(directory, source, analysis_types):

    print 'Running WIICA'
    inst_results = {}
    mem_results = {}

    compile.main(directory, source)
    inst_results = analysis.main(directory, source, analysis_types)
    if 'memory' in analysis_types:
        mem_results = mem_analysis.main(directory, source)
    all_results = dict(inst_results.items() + mem_results.items())
    if 'register' in analysis_types:
        reg_analysis.main(directory, source)

    return all_results
Exemple #9
0
def run(directory, source, analysis_types):

    print "Running WIICA"
    inst_results = {}
    mem_results = {}

    compile.main(directory, source)
    inst_results = analysis.main(directory, source, analysis_types)
    if "memory" in analysis_types:
        mem_results = mem_analysis.main(directory, source)
    all_results = dict(inst_results.items() + mem_results.items())
    if "register" in analysis_types:
        reg_analysis.main(directory, source)

    return all_results
Exemple #10
0
def run(directory, kernel, source, arguments, analysis_types):

    print 'Running WIICA'
    inst_results = {}
    mem_results = {}

    if arguments[0] == 'non':
        arguments = []

    compile.main(directory, kernel, source, arguments)
    process_trace.main(directory, kernel)
    inst_results = analysis.main(directory, kernel, analysis_types)
    if 'memory' in analysis_types:
        mem_results = mem_analysis.main(directory, kernel)
    all_results = dict(inst_results.items() + mem_results.items())
    if 'register' in analysis_types:
        reg_analysis.main(directory, kernel)

    return all_results
Exemple #11
0
    def test_smoke_cmd_synthesis(cls):
        fname = filenames[filename_totest]  # Just with one file for smoke test

        import analysis
        import synthesis
        analysis.main([
            'test/' + fname, '--f0_min', '75', '--f0_max', '500', '--f0',
            'test/' + fname.replace('.wav', '.f0'), '--spec',
            'test/' + fname.replace('.wav', '.spec'), '--nm',
            'test/' + fname.replace('.wav', '.nm')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--f0', 'test/' + fname.replace('.wav', '.f0'), '--spec',
            'test/' + fname.replace('.wav', '.spec')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--f0', 'test/' + fname.replace('.wav', '.f0'), '--spec',
            'test/' + fname.replace('.wav', '.spec'), '--nm',
            'test/' + fname.replace('.wav', '.nm')
        ])
        # synthesis.main(['test/'+fname.replace('.wav','.resynth.wav'), '--fs', '16000', '--f0', 'test/'+fname.replace('.wav','.f0'), '--spec', 'test/'+fname.replace('.wav','.spec'), '--pdd', 'test/'+fname.replace('.wav','.pdd')])

        analysis.main([
            'test/' + fname, '--f0_min', '75', '--f0_max', '200', '--f0_log',
            '--f0', 'test/' + fname.replace('.wav', '.lf0'), '--spec',
            'test/' + fname.replace('.wav', '.spec')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--logf0', 'test/' + fname.replace('.wav', '.lf0'), '--spec',
            'test/' + fname.replace('.wav', '.spec')
        ])

        analysis.main([
            'test/' + fname, '--f0_log', '--f0',
            'test/' + fname.replace('.wav', '.lf0'), '--spec_nbfwbnds', '65',
            '--spec', 'test/' + fname.replace('.wav', '.fwlspec'),
            '--nm_nbfwbnds', '33', '--nm',
            'test/' + fname.replace('.wav', '.fwnm')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--logf0', 'test/' + fname.replace('.wav', '.lf0'), '--fwlspec',
            'test/' + fname.replace('.wav', '.fwlspec'), '--fwnm',
            'test/' + fname.replace('.wav', '.fwnm')
        ])
    def Clicked(self, event):
    
        text=self.content.get()
        if text:
            self.content.set(text)
            self.conversation=\
                "you: " + text + "\n"
            self.textconversation.insert(tk.END,self.conversation)
            self.textconversation.see(tk.END)
            self.textentry.delete(0,tk.END)

        if text.endswith(".pdf"):
            self.file = text

        if 'outlier' in text:
            if self.file == '':
                self.conversation=\
                                    "bot: Please specify the file name\n"
            else:
                outliers = analysis.main(self.file,True)
                self.conversation=\
                                    "bot: The Outlier found in the data are:" + "\n" 
                for i,k in enumerate(outliers):
                    self.conversation+= '\t{}. Year: {}\n'.format(i+1,k[1])
                    for j in k[0].items():
                        self.conversation+= '\t {} : {}\n'.format(j[0],j[1])
            self.content.set(text)
            self.textconversation.insert(tk.END,self.conversation)
            self.textconversation.see(tk.END)
            self.textentry.delete(0,tk.END)
            
        else:
            respond=self.bot.respond(text)
            self.conversation=\
                "bot: " + respond + "\n"
            self.content.set(text)
            self.textconversation.insert(tk.END,self.conversation)
            self.textconversation.see(tk.END)
            self.textentry.delete(0,tk.END)
Exemple #13
0
def run(directory,
	kernel,
        source,
	arguments,
	analysis_types):

  print 'Running WIICA'
  inst_results = {}
  mem_results = {}

  if arguments[0] == 'non':
    arguments = []

  compile.main(directory, kernel, source, arguments)
  process_trace.main(directory, kernel)
  inst_results = analysis.main(directory, kernel, analysis_types)
  if 'memory' in analysis_types:
    mem_results = mem_analysis.main(directory, kernel)
  all_results = dict(inst_results.items() + mem_results.items())
  if 'register' in analysis_types:
    reg_analysis.main(directory, kernel)

  return all_results
Exemple #14
0
def main(n = 150000, quiet = False):
    """main(n = 150000, quiet = False)

    This script produces a grid of expected numbers of stars according to the selection 
        criteria of Yusef-Zedah et al. 2009, 702,178-225 The Astrophysical Journal.
        The grid is in av for visual extinction, apera for aperature size and age for the maxage 
        of the starformation size

    Parameters
    ----------
    n       integer:
        number of stars to be sampled per parameter set
    quiet   boolean:
        if true suppresses all standard output


    Returns
    ----------
    A number of fits-files with the sampled stars for different parameters to be specified in
    this file.
    Standard output is used to report progress, it will print out the parameter set to be 
    progressed next and the completeness of the script as
    AV aperaturesize maxage completeness ETA
    ETA is the time to complete in seconds based on the single last operation
    """
    t0 = time()         #timing possibility

    if quiet:
        output_stream = StringIO()
    else:
        output_stream = sys.stdout


    print(t0,file=output_stream)
    

    sfr = .01
    # star mass function
    kroupa = np.vectorize(functions.kroupa)
    mf = dist.Distribution(kroupa, .1, 50.)

    #star formation history
    constant_sfr = np.vectorize(functions.constant_sfr)
    
    ages = np.logspace(5,7,7)
    sf = [dist.Distribution(constant_sfr, 1000., ages[i]) for i in range(len(ages))]
    #sfr = [150000*mf.mean()/(ages[i]-1000.) for i in range(len(ages))]

    t1 = time()                 # finished reading the distributions
    print(t1,file=output_stream)


    # setting up model data
    aperas = np.logspace(2, 5, 4)
    avs = np.linspace(10.0, 50.0, 5)
    l = 1
    mpold, tmpnew = 0., time()
    parameters = []
    for i in range(len(avs)):
        for j in range(len(aperas)):
            for k in range(len(ages)):
                tmpold, tmpnew = tmpnew, time()
                starformation.main(massfunction = mf, starformationhistory = sf[k], \
                    A_v = avs[i], sfr = n, apera = aperas[j], maxage = ages[k], \
                    appendix = "%s_%03d_%06d_%09d" % ('sim',avs[i],aperas[j],ages[k]), quiet=True, precise=False)
                print(avs[i],aperas[j],ages[k], l/len(avs)/len(aperas)/len(ages), (len(avs)*len(aperas)*len(ages)-l)*(tmpnew-tmpold),file=output_stream)
                l = l+1
                
                parameters.append([avs[i],aperas[j],ages[k]])

    t2 = time()                 # end of simulation
    print(t2, t1, t2-t1)
    
    print ('number of simulations run: %s' %l , file=output_stream)  
    head = ['#','AV', 'Aperature_size', 'Age']
    f = open('out/__head', 'w')
    f.write( ','.join(head)+'\n' )
    np.savetxt(f, parameters)
    f.close()

    t3 = time()                 # end of saving data

    analysis.main('out')
    print ('analysis complete' , file=output_stream)  
    
    t4 = time()                 # end of analysing data



    print( 'starting script at %f'  %(t0), file=output_stream)
    print( 'initializing       %f'  %(t1-t0), file=output_stream)
    print( "running simulation %f"  %(t2-t1), file=output_stream)
    print( "writing data       %f"  %(t3-t2), file=output_stream)
    print( "analysing data     %f"  %(t4-t3), file=output_stream)
    print( "________________________", file=output_stream)
    print( "total runtime      %f"  %(t4-t0), file=output_stream)
    print( "finishing script   %f"  %t4, file=output_stream)
Exemple #15
0
import analysis

path_to_data = "/global/cscratch1/sd/zarija/4096/z05.h5"
path_to_catalog = '/global/cscratch1/sd/zarija/4096/catalog_z05_iso138.txt'
output_mass_frac = "./4096z05/mass_fraction.txt"
output_WHIM_data = "./4096z05/WHIM_data.txt"

analysis.main(path_to_data, path_to_catalog, output_mass_frac,
              output_WHIM_data)
Exemple #16
0
import pandas as pd
import json
import urllib.request
import numpy as np
import pymongo

import districtlist
import analysis
pd.set_option('mode.chained_assignment', None)

conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)
db = client.analysis
collection = db.districtdata

dict_data = {}
dict_value = []
print(f'data retrieval in progress ...')
for data in districtlist.distlist:

    url = f'https://www12.statcan.gc.ca/rest/census-recensement/CPR2016.json?lang=E&dguid={data}&topic=10&theme=5&notes=0'
    analysis.main(url)
    df_copy = analysis.languages_df.copy()
    df_copy.drop(['comment'], 1, inplace=True)
    df_copy.set_index(['rows'], inplace=True)
    records = json.loads(df_copy.to_json()).values()
    collection.insert_many(records)

print(f'data retrieval done.')
Exemple #17
0
            language.append(lan)
            count.append(0)
        total += 1

for l in language:
    i = language.index(l)
    count[i] = count[i] / float(total)

# for i in language:
#     ind = language.index(i)
#     if ind > 0:
#         count[ind] += count[ind-1]

# Separate the range 0,1 into
ranges = [count[0]]
for i in range(1, len(count)):
    ranges.append(ranges[i - 1] + count[i])

ranges[len(ranges) - 1] = 1

with open("dev.txt") as f2:
    with open("baseline.txt", "w") as f:
        for l in f2.readlines():
            number = random.random()
            i = 0
            while number > ranges[i]:
                i += 1
            f.write(language[i] + '\n')

analysis.main(resultsFile="baseline.txt")
import analysis

if __name__ == '__main__':
    analysis.main()
Exemple #19
0
                "Other", "Chinese", "Tibeto-Burman", "Tai-Kadai", "Turkic"]
comments = ["Electoral Districts", "Link to Stascan information", "Single Responses: Mothertongue", "None", "None", "None",\
               "None", "mainly Arabic, Hebrew, Somali", "includes Khmer, Vietnamese", "includes Tagalog (Philipino)",\
               "includes Tamil", "English and French excluded", "includes Hindi, Urdu...", "mainly Farsi and Kurdish",\
                "Japanese, Korean, Mongolic", "Niger-Congo, Nilo-Saharan and Creole", "Sign languages and other languages",\
               "mainly Mandarin and Cantonese", "None", "mainly Thai and Lao", "None"]

language_dict = {}
language_dict["rows"] = row_names
language_dict["comment"] = comments

print(f'data retrieval in progress ...')
for data in gd.provlist:
    
    url =f'https://www12.statcan.gc.ca/rest/census-recensement/CPR2016.json?lang=E&dguid={data}&topic=10&theme=5&notes=0'
    district, languages = analysis.main(url)    
    language_dict[district] = languages
    #df_copy = analysis.languages_df.copy()
    #df_copy.drop (['comment'], 1, inplace=True)
    #df_copy.set_index(['rows'], inplace=True)
    #records = json.loads(df_copy.to_json()).values()
    #collection.insert_many(records)
    


    ###print(len(languages), len(comments), len(row_names))
    # When the dictionary is complete, a new dataframe can be put together:

languages_df = pd.DataFrame.from_dict(language_dict)

    #languages_df
Exemple #20
0
            count.append(0)
        total += 1

for l in language:
    i = language.index(l)
    count[i] = count[i] / float(total)

# for i in language:
#     ind = language.index(i)
#     if ind > 0:
#         count[ind] += count[ind-1]

# Separate the range 0,1 into 
ranges = [count[0]]
for i in range(1, len(count)):
    ranges.append(ranges[i-1] + count[i])

ranges[len(ranges) -1] = 1

with open("dev.txt") as f2:
    with open("baseline.txt", "w") as f:
        for l in f2.readlines():
            number = random.random()
            i = 0
            while number > ranges[i]:
                i += 1
            f.write(language[i] + '\n')


analysis.main(resultsFile="baseline.txt")
Exemple #21
0
    def test_smoke_cmd_analysis(cls):
        fname = filenames[filename_totest]  # Just with one file for smoke test

        import analysis
        analysis.main(['test/' + fname])
        analysis.main(
            ['test/' + fname, '--f0', 'test/' + fname.replace('.wav', '.f0')])
        analysis.main([
            'test/' + fname, '--f0', 'test/' + fname.replace('.wav', '.f0'),
            '--preproc_fs', '8000'
        ])
        analysis.main([
            'test/' + fname, '--f0_min', '75', '--f0',
            'test/' + fname.replace('.wav', '.f0')
        ])
        analysis.main([
            'test/' + fname, '--f0_max', '200', '--f0',
            'test/' + fname.replace('.wav', '.f0')
        ])
        analysis.main([
            'test/' + fname, '--f0_min', '81', '--f0_max', '220', '--f0',
            'test/' + fname.replace('.wav', '.f0')
        ])

        f0s = np.fromfile('test/' + fname.replace('.wav', '.f0'),
                          dtype=np.float32)
        f0s = f0s.reshape((-1, 1))
        np.savetxt('test/' + fname.replace('.wav', '.f0txt'), f0s)

        analysis.main([
            'test/' + fname, '--inf0txt',
            'test/' + fname.replace('.wav', '.f0txt'), '--spec',
            'test/' + fname.replace('.wav', '.spec')
        ])
        analysis.main([
            'test/' + fname, '--inf0bin',
            'test/' + fname.replace('.wav', '.f0'), '--spec',
            'test/' + fname.replace('.wav', '.spec')
        ])
        analysis.main([
            'test/' + fname, '--f0_log', '--f0',
            'test/' + fname.replace('.wav', '.lf0')
        ])
        analysis.main([
            'test/' + fname, '--spec',
            'test/' + fname.replace('.wav', '.spec')
        ])
        # analysis.main(['test/'+fname, ' --spec_mceporder', '59', '--spec', 'test/'+fname.replace('.wav','.mcep')]) # Need SPTK for this one
        analysis.main([
            'test/' + fname, '--spec_nbfwbnds', '65', '--spec',
            'test/' + fname.replace('.wav', '.fwlspec')
        ])
        analysis.main([
            'test/' + fname, '--pdd', 'test/' + fname.replace('.wav', '.pdd')
        ])
        # analysis.main(['test/'+fname, '--pdd_mceporder', '60', '--pdd', 'test/'+fname.replace('.wav','.pdd')])  # Need SPTK for this one
        analysis.main(
            ['test/' + fname, '--nm', 'test/' + fname.replace('.wav', '.nm')])
        analysis.main([
            'test/' + fname, '--nm_nbfwbnds', '33', '--nm',
            'test/' + fname.replace('.wav', '.fwnm')
        ])

        # Test pre-processing
        analysis.main([
            'test/' + fname, '--inf0txt',
            'test/' + fname.replace('.wav', '.f0txt'), '--spec',
            'test/' + fname.replace('.wav', '.spec_resample16kHz'),
            '--preproc_fs', '16000'
        ])
        analysis.main([
            'test/' + fname, '--inf0txt',
            'test/' + fname.replace('.wav', '.f0txt'), '--spec',
            'test/' + fname.replace('.wav', '.spec_preproc_hp'),
            '--preproc_hp', '100.0'
        ])
Exemple #22
0
def main():
    options, args = loadOptions()
    # Train & Develop Model
    s1models = langMap(lambda l: {})
    totalCount = langMap()
    prob = totalCount
    train(s1models, totalCount)
    if options.stage == 2:
        s2models = trainFreqWords(options.N)

    # Run Model on Training Set
    predictions = []
    testFile = "training.txt"
    with open(testFile) as f:
        for line in f:
            line = line.split("\t", 1)[1]
            if options.stage == 2:
                prediction = predict2(line, s1models, s2models, includetl=not
                        options.notag)
            else:
                prediction = predict(line, s1models, prob)
            predictions.append(prediction[0][0])
    with open(testFile + ".out", "w") as f:
        f.write("\n".join(predictions))
    analysis.main(testFile, ignoretl = options.notag or not options.test)

    # Run Model on Development Set
    predictions = []
    testFile = "test.txt" if options.test else "dev.txt"
    with open(testFile) as f:
        for line in f.readlines():
            key, line = line.split("\t", 1)
            if options.stage == 2:
                prediction = predict2(line, s1models, s2models,
                        includetl=not options.notag)
            else:
                prediction = predict(line, s1models, prob)
            if options.verbose:
                print("PREDICTION:", prediction)
                print("LINE: " + line)
            predictions.append(prediction[0][0])

    with open(testFile + ".out", "w") as f:
        f.write("\n".join(predictions))

    print("Check " + testFile + ".out for the prediction results.")

    # Calculate the Precision and Recall
    analysis.main(testFile, ignoretl = options.notag or not options.test)

    if options.interactive:
        while True:
            try:
                line = raw_input("Line to parse (or Ctrl-D to shut down): ")
            except EOFError:
                print("\nShutting Down...")
                break
            if options.stage == 2:
                prediction = predict2(line, s1models, s2models,
                        includetl= not options.notag)
            else:
                prediction = predict(line, s1models, prob)
            sum_prob = sum([p[1] for p in prediction])
            for l, p in prediction:
                print('  %s : %.2f%%' % (l, p * 100 / sum_prob))
Exemple #23
0
    def test_smoke_cmd_synthesis(cls):
        fname = filenames[filename_totest]  # Just with one file for smoke test

        import analysis
        import synthesis

        analysis.main([
            'test/' + fname, '--f0_min', '75', '--f0_max', '500', '--f0',
            'test/' + fname.replace('.wav', '.f0'), '--spec',
            'test/' + fname.replace('.wav', '.spec'), '--pdd',
            'test/' + fname.replace('.wav', '.pdd')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--f0', 'test/' + fname.replace('.wav', '.f0'), '--spec',
            'test/' + fname.replace('.wav', '.spec'), '--pdd',
            'test/' + fname.replace('.wav', '.pdd')
        ])

        analysis.main([
            'test/' + fname, '--f0_min', '75', '--f0_max', '500', '--f0',
            'test/' + fname.replace('.wav', '.f0'), '--spec',
            'test/' + fname.replace('.wav', '.spec'), '--nm',
            'test/' + fname.replace('.wav', '.nm')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--f0', 'test/' + fname.replace('.wav', '.f0'), '--spec',
            'test/' + fname.replace('.wav', '.spec')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--f0', 'test/' + fname.replace('.wav', '.f0'), '--spec',
            'test/' + fname.replace('.wav', '.spec'), '--nm',
            'test/' + fname.replace('.wav', '.nm')
        ])

        analysis.main([
            'test/' + fname, '--f0_min', '75', '--f0_max', '200', '--f0_log',
            '--f0', 'test/' + fname.replace('.wav', '.lf0'), '--spec',
            'test/' + fname.replace('.wav', '.spec')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--logf0', 'test/' + fname.replace('.wav', '.lf0'), '--spec',
            'test/' + fname.replace('.wav', '.spec')
        ])

        analysis.main([
            'test/' + fname, '--f0_min', '75', '--f0_max', '500', '--f0',
            'test/' + fname.replace('.wav', '.f0'), '--spec_nblinlogbnds',
            '129', '--spec', 'test/' + fname.replace('.wav', '.lspec')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--logf0', 'test/' + fname.replace('.wav', '.lf0'), '--lspec',
            'test/' + fname.replace('.wav', '.lspec')
        ])

        analysis.main([
            'test/' + fname, '--f0_min', '75', '--f0_max', '500', '--f0',
            'test/' + fname.replace('.wav', '.f0'), '--spec_fwceporder', '59',
            '--spec', 'test/' + fname.replace('.wav', '.fwcep'),
            '--nm_nbfwbnds', '33', '--nm',
            'test/' + fname.replace('.wav', '.fwnm')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--logf0', 'test/' + fname.replace('.wav', '.lf0'), '--fwcep',
            'test/' + fname.replace('.wav', '.fwcep'), '--fwnm',
            'test/' + fname.replace('.wav', '.fwnm')
        ])

        # This one is the most used and thus should be the last one
        analysis.main([
            'test/' + fname, '--f0_log', '--f0',
            'test/' + fname.replace('.wav', '.lf0'), '--spec_nbfwbnds', '65',
            '--spec', 'test/' + fname.replace('.wav', '.fwlspec'),
            '--nm_nbfwbnds', '33', '--nm',
            'test/' + fname.replace('.wav', '.fwnm')
        ])
        synthesis.main([
            'test/' + fname.replace('.wav', '.resynth.wav'), '--fs', '16000',
            '--logf0', 'test/' + fname.replace('.wav', '.lf0'), '--fwlspec',
            'test/' + fname.replace('.wav', '.fwlspec'), '--fwnm',
            'test/' + fname.replace('.wav', '.fwnm')
        ])
Exemple #24
0
def main(path, orf_name, yeast_fname, is_annotated, is_aligned, align_pairwise,
         **kwargs):
    #algorithm=kwargs.pop('algorithm','mafft')
    print(orf_name)
    #    start = 2754
    #    end = 2918

    #        path = 'data/pgs/YLL059C_2/'
    #        orf_name = 'YLL059C'
    #        yeast_fname = 'data/orf_genomic_all.fasta'
    #        is_annotated = True
    if is_aligned:
        filename = [s for s in os.listdir(path) if 'muscle.fa' in s][0]
    else:
        filename = [s for s in os.listdir(path) if '_alignment.fa' in s][0]
    # mcl = MuscleCommandline(input='data/ybr_deneme/YBR196C-A_alignment.fa',out = 'data/ybr_deneme/YBR196C-A_alignment_muscle.fa')
    # find_best_overlap_id('data/ybr_deneme/Spar')
    aln = SeqIO.parse(path + '/' + filename, 'fasta')
    maxlen = 0
    for rec in aln:
        l = len(rec.seq)
        if l > maxlen:
            maxlen = l
    #    if maxlen>100000:
    #        return 0
    orf_seq = None
    if is_annotated:
        yeast = SeqIO.parse(yeast_fname, 'fasta')
        for record in yeast:
            if record.id == orf_name:
                orf_seq = record.seq
        if orf_seq is None:
            print(orf_name + ' is not found in ' + yeast_fname)
            return (0)
    else:
        yeast = SeqIO.parse(yeast_fname, 'fasta')
        for record in yeast:
            orf_seq = record.seq
    if align_pairwise:
        msa_file = list(SeqIO.parse(path + '/' + filename, 'fasta'))
        ref_seq_record = [rec for rec in msa_file if rec.id == 'Scer'][0]

        for record in msa_file:
            if record.id == 'Scer' or len(record.seq) == 0:
                continue

            start, end = get_subalignment([ref_seq_record, record],
                                          str(orf_seq),
                                          path,
                                          orf_name,
                                          is_aligned=is_aligned,
                                          **kwargs)
            aln_file_name = [
                s for s in os.listdir(path)
                if '_subalignment_extended_' + record.id in s
            ][0]
            align = AlignIO.read(path + '/' + aln_file_name, 'fasta')
            try:
                ref_seq_id = [
                    i for i, rec in enumerate(align) if rec.id == 'Scer'
                ][0]
            except IndexError:
                print('Reference sequence name is not in the alignment')

            find_homologs(align=align,
                          ref_seq_id=ref_seq_id,
                          ref_range=[start, end],
                          orf_name=orf_name,
                          out_path=path,
                          **kwargs)
    else:
        start, end = get_subalignment(path + '/' + filename,
                                      str(orf_seq),
                                      path,
                                      orf_name,
                                      is_aligned=is_aligned,
                                      **kwargs)
        aln_file_name = [
            s for s in os.listdir(path) if '_alignment_muscle' in s
        ][0]
        align = AlignIO.read(path + '/' + aln_file_name, 'fasta')
        try:
            ref_seq_id = [
                i for i, rec in enumerate(align) if rec.id == 'Scer'
            ][0]
        except IndexError:
            print('Reference sequence name is not in the alignment')

        find_homologs(align=align,
                      ref_seq_id=ref_seq_id,
                      ref_range=[start, end],
                      orf_name=orf_name,
                      out_path=path,
                      **kwargs)
        # ss = []
    analysis.main(path, orf_name, yeast_fname, is_annotated, align_pairwise)
Exemple #25
0
def main():
    options, args = loadOptions()
    # Train & Develop Model
    s1models = langMap(lambda l: {})
    totalCount = langMap()
    prob = totalCount
    train(s1models, totalCount)
    if options.stage == 2:
        s2models = trainFreqWords(options.N)

    # Run Model on Training Set
    predictions = []
    testFile = "training.txt"
    with open(testFile) as f:
        for line in f:
            line = line.split("\t", 1)[1]
            if options.stage == 2:
                prediction = predict2(line,
                                      s1models,
                                      s2models,
                                      includetl=not options.notag)
            else:
                prediction = predict(line, s1models, prob)
            predictions.append(prediction[0][0])
    with open(testFile + ".out", "w") as f:
        f.write("\n".join(predictions))
    analysis.main(testFile, ignoretl=options.notag or not options.test)

    # Run Model on Development Set
    predictions = []
    testFile = "test.txt" if options.test else "dev.txt"
    with open(testFile) as f:
        for line in f.readlines():
            key, line = line.split("\t", 1)
            if options.stage == 2:
                prediction = predict2(line,
                                      s1models,
                                      s2models,
                                      includetl=not options.notag)
            else:
                prediction = predict(line, s1models, prob)
            if options.verbose:
                print("PREDICTION:", prediction)
                print("LINE: " + line)
            predictions.append(prediction[0][0])

    with open(testFile + ".out", "w") as f:
        f.write("\n".join(predictions))

    print("Check " + testFile + ".out for the prediction results.")

    # Calculate the Precision and Recall
    analysis.main(testFile, ignoretl=options.notag or not options.test)

    if options.interactive:
        while True:
            try:
                line = raw_input("Line to parse (or Ctrl-D to shut down): ")
            except EOFError:
                print("\nShutting Down...")
                break
            if options.stage == 2:
                prediction = predict2(line,
                                      s1models,
                                      s2models,
                                      includetl=not options.notag)
            else:
                prediction = predict(line, s1models, prob)
            sum_prob = sum([p[1] for p in prediction])
            for l, p in prediction:
                print('  %s : %.2f%%' % (l, p * 100 / sum_prob))
Exemple #26
0
for gprotein in gprotein_list:
	#print gprotein + '... ',
	for files in os.listdir('.'):
		if gprotein in files and 'fweight' in files:
			features = extract_features(files)
			#print features

			hmm_pos, hmm_pos_positions = load_functions.read_hmm(path+'/data/hmm_models/'+gprotein+'_pos.hmm')
			hmm_neg, hmm_neg_positions = load_functions.read_hmm(path+'/data/hmm_models/'+gprotein+'_neg.hmm')

			pos = read_gprotein_hmm_out(path+'/temp/'+gprotein+'_pos.out', hmm_pos_positions)
			neg = read_gprotein_hmm_out(path+'/temp/'+gprotein+'_neg.out', hmm_neg_positions)

			if hack_directory != None:
				#if gprotein == 'GNA12':
					l = analysis.main(path, pos, neg, hmm_pos, hmm_neg, features, gprotein, obj.keys(), obj)
					open(hack_directory+'/'+str(gprotein)+'.txt', 'w').write(l)
					#sys.exit()

			l= 'GPCR\t'
			for f in features:
				l+=f+'\t'
			l+= '\n'
			data = read_aln(pos, neg, hmm_pos, hmm_neg, l, features, gprotein)
			#if gprotein == 'GNAI3':
			#	print data

			feature_matrix = data[:, 2:]
			model = extract_model(gprotein)

			min_max = k_fold(path+'/data/feature_files/'+str(gprotein)+'_train.txt')