Example #1
0
 if (args.pseudocounts):
     sys.stderr.write("Second Step: Generating the Markov Models\n")
     back_dict=m.build_hash_pseudocount([x for x in m.background_separation(training_filename)],args.order)
     sys.stderr.write("The MM for the background DONE!\n")
     sign_dict=m.build_hash_pseudocount([x for x in m.signal_separation(training_filename)],args.order)
     sys.stderr.write("The MM for the foreground DONE!\n\n")
 else:
     sys.stderr.write("Second Step: Generating the Markov Models\n")
     back_dict=m.build_hash([x for x in m.background_separation(training_filename)],args.order)
     sys.stderr.write("The MM for the background DONE!\n")
     sign_dict=m.build_hash([x for x in m.signal_separation(training_filename)],args.order)
     sys.stderr.write("The MM for the foreground DONE!\n\n")
 #### 4. printing the MM in a file ####
 output_mm=args.outfile+'.MM'
 sys.stderr.write("Printing the model in {}\n".format(output_mm))
 m.print_hash(sign_dict,back_dict,output_mm)
 #### 5. Evaluation of the model ####
 sys.stderr.write("Third Step, Evaluation of the model\n")
 if (args.boundaries):
     sys.stderr.write("Boundaries evaluation have been selected.\nThe threshold range goes from -40 to 40\n")
     output_filename= args.outfile+'.data'
     output=open(output_filename, "w")
     list_t=[-40]+[x for x in range(-10,10,1)]+[40]
     for i in list_t:
         sys.stderr.write("{}\n".format(i))
         TPR=0
         FPR=0
         PPV=0
         TP=0
         FP=0
         TN=0
Example #2
0

if (args.pseudocounts):
    sys.stderr.write("Starting the training background and foreground separation...\n\n")
    back_dict=m.build_hash_pseudocount([x for x in m.background_separation(training_filename)],args.order)
    sys.stderr.write("The MM for the bg is generated...\n\n")
    sign_dict=m.build_hash_pseudocount([x for x in m.signal_separation(training_filename)],args.order)
    sys.stderr.write("The MM for the bg is generated...\n\n")
else:
    back_dict=m.build_hash([x for x in m.background_separation(training_filename)],args.order)
    sign_dict=m.build_hash([x for x in m.signal_separation(training_filename)],args.order)

######### 4 print ########
output_filename=args.outfile+'.MM'
sys.stderr.write("Printing the model in {}...\n\n".format(output_filename))
m.print_hash(sign_dict,back_dict,output_filename)

######## 5 windows #######
sys.stderr.write("Starting the testing background and foreground separation...\n\n")

sys.stderr.write("Starting the computation of the Scores...\n\n")
output_filename_fg=args.outfile+'_fg.th'
output_filename_bg=args.outfile+'_bg.th'
output_filename_all_top = args.outfile+'_all_top.th'
output_filename_all_bot = args.outfile+'_all_bot.th'
out_fg=open(output_filename_fg, "w")
out_bg=open(output_filename_bg, "w")
output_all_top=open(output_filename_all_top, "w")
output_all_bot=open(output_filename_all_bot, "w")

sys.stderr.write("Printing the background scores at {}...\n\n".format(output_filename_bg))
                    action='store_true',
                    default=False,
                    help="Add pseudocounts in the generated MM"
                    )
parser.add_argument('-k','--order',dest='order',action='store',type=int,
                    default=3,
                    help='It sets the order of the Markov Model used.')
args = parser.parse_args()

######## 1 TT SETS ######
training_filename='training_'+args.infile
testing_filename='testing_'+args.infile
m.training_testing_sets_separation(args.infile,3,training_filename,testing_filename)

######## 2 +/- ###########
(background, signal)=m.background_signal_separation(training_filename)

######## 3 kmer dict #####
if (args.pseudocounts):
    back_dict=m.build_hash_pseudocount(background,args.order)
    sign_dit=m.build_hash_pseudocount(signal,args.order)
else:
    back_dict=m.build_hash(background,args.order)
    sign_dit=m.build_hash(signal,args.order)

######### 4 print ########

m.print_hash(sign_dit,back_dict,args.outfile)

sys.exit()