Python main Examples

Programming Language: Python

Namespace/Package Name: smtp

Method/Function: main

Examples at hotexamples.com: 7

Python main - 7 examples found. These are the top rated real world Python examples of smtp.main extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: parse_pairwise.py Project: gsethi/regulome-explorer

def process_pairwise_edges(dataset_label, matrixfile, pairwised_file, pvlambda, config, results_path, do_pubcrawl, contacts, keep_unmapped, featureInterestingFile):
	"""
	Include edges where nodes are in original set, direction does not matter so do not populate edge if A->B if B->A are in hash
	Expected tab delimited columns are nodeA nodeB pvalue correlation numNonNA	
	"""
	edges_hash = {}
	max_pv = -1000.0
	max_pv_corr = -1000.0
	mydb = db_util.getDBSchema(config) #config.get("mysql_jdbc_configs", "db")
	myuser = db_util.getDBUser(config) #config.get("mysql_jdbc_configs", "username")
	mypw = db_util.getDBPassword(config) #config.get("mysql_jdbc_configs", "password")
	myhost = db_util.getDBHost(config)
	myport = db_util.getDBPort(config)
	mysolr = db_util.getSolrPath(config)
	edges_file = open(pairwised_file)
	fIntHash = parse_features_rfex.get_feature_interest_hash(featureInterestingFile)
	edge_table = mydb + ".mv_" + dataset_label + "_feature_networks" 
	efshout = open(results_path + 'load_edges_' + dataset_label + '.sh','w')
	solrshout = open(results_path + 'load_solr_' + dataset_label + '.sh','w')
	edges_out_re = open(results_path + 'edges_out_' + dataset_label + '_pw_re.tsv','w')
	edges_out_pc = open(results_path + 'edges_out_' + dataset_label + '_pw_pc.tsv','w')
	edges_meta_json = open(results_path + 'edges_out_' + dataset_label + '_meta.json','w')
	unmappedPath = results_path + 'edges_out_' + dataset_label + '_pw_unmapped.tsv'
	unmappedout = open(unmappedPath,'w')
	features_file = open(results_path + dataset_label + '_features_out.tsv','r')
	features_hash = {}
	for fl in features_file.readlines():
		ftk = fl.strip().split("\t")
		features_hash[ftk[1]] = ftk
		features_file.close()

	validEdgeId = 1
	invalidEdges = 0
	dupeEdges = 0
	totalEdges = 0
	cnan = 0
	pcc = 0
	unMapped = 0
	for line in edges_file:
		totalEdges += 1 
		line = line.strip()
		tokens = line.split('\t')
		if (len(tokens) < 11):
			if (validEdgeId == 1):
				print "Skipping header/line 1 for insufficient token reasons"
				continue
			print "ERROR: requires 11 tokens, found:" + str(len(tokens)) + " Skipping line\n" + line
			continue
		nodeA = tokens[0]
		nodeB = tokens[1]

		try:
			f1genescore = fIntHash[nodeA]
		except KeyError:
			f1genescore = 0
		try:
			f2genescore = fIntHash[nodeB]
		except KeyError:
			f2genescore = 0

		if (db_util.isUnmappedAssociation(nodeA, nodeB) and keep_unmapped == 0):
			unmappedout.write(nodeA + "\t" + nodeB + "\n")
			unMapped += 1
			continue
		#nodeA = nodeA.replace('|', '_')
		#nodeB = nodeB.replace('|', '_')
		try:
			features_hash[nodeA]
		except KeyError:
			print "key error in resolving featureId for " + nodeA + " skipping edge."
			continue
		try:
			features_hash[nodeB]
		except KeyError:
			print "key error in resolving featureId for " + nodeB + " skipping edge."
			continue

		if (features_hash[nodeA] and features_hash[nodeB]):
			if (not edges_hash.get(nodeA + "_" + nodeB) and not edges_hash.get(nodeA + "_" + nodeB)):
				feature1id = ""#str(features_hash[nodeA]) 
				feature2id = ""#str(features_hash[nodeB])
				#This will need to be improve once all pairs has annotations 
				try:
					feature1id = str(features_hash[nodeA][0])
				except KeyError:
					print "ERROR: key error in resolving featureId for " + nodeA
				try:
					feature2id = str(features_hash[nodeB][0])
				except:
					print "ERROR: key error in resolving featureId for " + nodeB

				edges_hash[nodeA + "_" + nodeB] = validEdgeId
				validEdgeId += 1
				dataA = process_feature_alias(nodeA)
				label1_desc = ""
				dataB = process_feature_alias(nodeB)
				label2_desc = ""
				if (len(dataA) == 7):
					dataA.append("")
					nodeA = nodeA + ":"
				if (len(dataB) == 7):
					dataB.append("")
					nodeB = nodeB + ":"
				correlation_str = tokens[2]
				try:
					correlation = float(correlation_str)
				except ValueError:
					#Align correlation value to NaN
					cnan += 1
					correlation = float('nan')
					correlation_str = ''
				numna = tokens[3]
				pv_str = tokens[4]
				bonf = tokens[5]
				pv_bonf_str = tokens[6]
				numnaf1 = tokens[7]
				pvf1_str = tokens[8]
				numnaf2 = tokens[9]
				pvf2_str = tokens[10]
				try:
					pv = str(pvlambda(float(pv_str)))
					pv_bonf = str(pvlambda(float(pv_bonf_str)))
					pvf1 = str(pvlambda(float(pvf1_str)))
					pvf2 = str(pvlambda(float(pvf2_str)))
				except ValueError:
					#error in pairwise script, ignore these associations for now
					continue;

				if (float(pv) > max_pv):
					max_pv = float(pv)
				
				if (float(pv_bonf) > max_pv_corr):
					max_pv_corr = float(pv_bonf)

				rho = str(db_util.sign(correlation)*abs(float(pv)))
				
				link_distance = 500000000
				if ( len(tokens) >= 12 ):
  					link_distance = int(tokens[11])
				else:
					if (len(dataA) >=5 and len(dataB)>=5 and db_util.is_numeric(dataA[4]) >= 1 and db_util.is_numeric(dataB[4]) >= 1 and dataA[3] == dataB[3]):
						link_distance = abs(int(dataB[4]) - int(dataA[4]))
				edges_out_re.write(feature1id + "\t" + feature2id + "\t" + nodeA + "\t" + "\t".join(dataA) + "\t" + nodeB + "\t" + "\t".join(dataB) + "\t" + correlation_str + "\t" + numna + "\t" + pv + "\t" + bonf + "\t" + pv_bonf + "\t" + numnaf1 + "\t" + pvf1 + "\t" + numnaf2 + "\t" + pvf2 + "\t" + rho + "\t" + str(link_distance) + "\t" + str(f1genescore) + "\t" + str(f2genescore) + "\n")
				if (do_pubcrawl == "yes"):
					#call andrea code
					getPairwiseInfo.processLine(line, edges_out_pc)
					pcc += 1
			else:
				print "duplicated edge:" + nodeA + "_" + nodeB
				dupeEdges += 1
		else:
			print "invalid edge nodeA and nodeB not in features:" + nodeA + "_" + nodeB
			invalidEdges += 1
	print "Report: Valid Edges %i Duped %i cNAN %i \nunMapped %i Saved to %s \nTotal %i max_pvalue %f max_pvalue_corr %f" %(validEdgeId-1, dupeEdges, cnan, unMapped,unmappedPath, totalEdges, max_pv, max_pv_corr)	
	edges_meta_json.write('{"max_logpv":%f}' %(max_pv))
	edges_file.close()
	edges_out_re.close()
	edges_out_pc.close()
	edges_meta_json.close()	
	unmappedout.close()
	efshout.write("#!/bin/bash\n")
	efshout.write("mysql -h %s --port %s --user=%s --password=%s --database=%s<<EOFMYSQL\n" %(myhost, myport, myuser, mypw, mydb))
	efshout.write("load data local infile '" + edges_out_re.name + "' replace INTO TABLE " + edge_table + " fields terminated by '\\t' LINES TERMINATED BY '\\n';\n")
	efshout.write("\ncommit;")
	efshout.write("\nEOFMYSQL")
	efshout.close()
	print "Begin pairwise db bulk upload " + time.ctime() 
	os.system("sh " + efshout.name)
	#create sharded association files for solr import
	solrshout.write("#!/bin/bash\n");
	solrshout.write("python createPWShardedDataset.py " + edges_out_re.name + " " + dataset_label + "\n") 
	solrshout.write("curl '" + mysolr + "/core0/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
 	solrshout.write("curl '" + mysolr + "/core1/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
 	solrshout.write("curl '" + mysolr + "/core2/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
 	solrshout.write("curl '" + mysolr + "/core3/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
 	solrshout.write("curl '" + mysolr + "/core4/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
 	solrshout.write("curl '" + mysolr + "/core5/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
 	solrshout.write("curl '" + mysolr + "/core6/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
 	solrshout.write("curl '" + mysolr + "/core7/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
	solrshout.write("curl '" + mysolr + "/core0/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core0_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core1/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core1_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core2/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core2_final.tsv  -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core3/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core3_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core4/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core4_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core5/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core5_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core6/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core6_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core7/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core7_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.close()
	print "Begin pairwise solr upload " + time.ctime()
	os.system("sh " + solrshout.name)
	if (do_pubcrawl == "yes"):
		print "senting Pubcrawl notification to " + contacts
		smtp.main("*****@*****.**", contacts, "Notification - New Pairwise Associations for PubCrawl", "New pairwise associations ready for PubCrawl load\n" + edges_out_pc.name + "\n\n" + str(pcc) + " Total Edges\n\n" + edges_out_re.name + " loaded into RegulomeExplorer, dataset label is " + dataset_label + " \n\n")

Example #2

Show file

File: main.py Project: procamora/Testeador-de-Red

def Email(adjuntos):
    smtp.main(adjuntos)
    gui.msgbox('Mensaje Enviado')

Example #3

Show file

File: train_ce.py Project: Tomasyao77/age-estimation-pytorch

def main(mydict):
    print("开始训练时间：")
    start_time = time.strftime('%Y-%m-%d %H:%M:%S',
                               time.localtime(time.time()))
    print(start_time)
    # py脚本额外参数
    args = get_args()
    # main函数传入参数
    my_data_dir = mydict["data_dir"]
    my_tensorboard = mydict["tensorboard"]
    my_checkpoint = mydict["checkpoint"]
    my_ifSE = mydict["ifSE"]
    my_l1loss = mydict["l1loss"]
    if my_l1loss:
        l1loss = 0.1  # 0.1
        # l1loss = my_l1value
    else:
        l1loss = 0.0

    if args.opts:
        cfg.merge_from_list(args.opts)

    cfg.freeze()
    start_epoch = 0

    # checkpoint_dir = Path(args.checkpoint)
    checkpoint_dir = Path(my_checkpoint)
    checkpoint_dir.mkdir(parents=True, exist_ok=True)

    # create model_dir
    print("=> creating model_dir '{}'".format("se_resnext50_32x4d"))
    # model = get_model(model_name="se_resnext50_32x4d")
    model = my_model(my_ifSE)

    if cfg.TRAIN.OPT == "sgd":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=cfg.TRAIN.LR,
                                    momentum=cfg.TRAIN.MOMENTUM,
                                    weight_decay=cfg.TRAIN.WEIGHT_DECAY)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=cfg.TRAIN.LR)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    # optionally resume from a checkpoint
    resume_path = args.resume
    if resume_path:
        print(Path(resume_path).is_file())
        if Path(resume_path).is_file():
            print("=> loading checkpoint '{}'".format(resume_path))
            checkpoint = torch.load(resume_path, map_location="cpu")
            start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                resume_path, checkpoint['epoch']))
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(resume_path))

    if args.multi_gpu:
        model = nn.DataParallel(model)

    if device == "cuda":
        cudnn.benchmark = True

    # 损失计算准则
    criterion = nn.CrossEntropyLoss().to(device)
    train_dataset = FaceDataset_ceface(my_data_dir,
                                       "train",
                                       img_size=cfg.MODEL.IMG_SIZE,
                                       augment=True,
                                       age_stddev=cfg.TRAIN.AGE_STDDEV)
    train_loader = DataLoader(train_dataset,
                              batch_size=cfg.BATCH_SIZE,
                              shuffle=True,
                              num_workers=cfg.TRAIN.WORKERS,
                              drop_last=True)

    val_dataset = FaceDataset_ceface(my_data_dir,
                                     "valid",
                                     img_size=cfg.MODEL.IMG_SIZE,
                                     augment=False)
    val_loader = DataLoader(val_dataset,
                            batch_size=cfg.BATCH_SIZE,
                            shuffle=False,
                            num_workers=cfg.TRAIN.WORKERS,
                            drop_last=False)

    scheduler = StepLR(optimizer,
                       step_size=cfg.TRAIN.LR_DECAY_STEP,
                       gamma=cfg.TRAIN.LR_DECAY_RATE,
                       last_epoch=start_epoch - 1)
    best_val_mae = 10000.0
    train_writer = None
    val_mae_list = []
    train_loss_list = []
    val_loss_list = []

    if my_tensorboard is not None:
        opts_prefix = "_".join(args.opts)
        train_writer = SummaryWriter(log_dir=my_tensorboard + "/" +
                                     opts_prefix + "_train")
        val_writer = SummaryWriter(log_dir=my_tensorboard + "/" + opts_prefix +
                                   "_val")

    for epoch in range(start_epoch, 80):  # cfg.TRAIN.EPOCHS):
        # train
        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, epoch, device, l1loss)
        train_loss_list.append(train_loss)

        # validate
        val_loss, val_acc, val_mae = validate(val_loader, model, criterion,
                                              epoch, device, l1loss)
        val_mae_list.append(val_mae)
        val_loss_list.append(val_loss)

        if my_tensorboard is not None:
            train_writer.add_scalar("loss", train_loss, epoch)
            train_writer.add_scalar("acc", train_acc, epoch)
            val_writer.add_scalar("loss", val_loss, epoch)
            val_writer.add_scalar("acc", val_acc, epoch)
            val_writer.add_scalar("mae", val_mae, epoch)

        if val_mae < best_val_mae or val_mae > 0:
            print(
                f"=> [epoch {epoch:03d}] best val mae was improved from {best_val_mae:.3f} to {val_mae:.3f}"
            )
            best_val_mae = val_mae
            # checkpoint
            if val_mae < 4.0:
                model_state_dict = model.module.state_dict(
                ) if args.multi_gpu else model.state_dict()
                torch.save(
                    {
                        'epoch': epoch + 1,
                        'arch': cfg.MODEL.ARCH,
                        'state_dict': model_state_dict,
                        'optimizer_state_dict': optimizer.state_dict()
                    },
                    str(
                        checkpoint_dir.joinpath(
                            "epoch{:03d}_{:.5f}_{:.4f}.pth".format(
                                epoch, val_loss, val_mae))))
        else:
            print(
                f"=> [epoch {epoch:03d}] best val mae was not improved from {best_val_mae:.3f} ({val_mae:.3f})"
            )

        # adjust learning rate
        scheduler.step()

    print("=> training finished")
    print(f"additional opts: {args.opts}")
    print(f"best val mae: {best_val_mae:.3f}")
    print("结束训练时间：")
    end_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    print(end_time)
    print("训练耗时: " + smtp.date_gap(start_time, end_time))
    # 发邮件
    smtp.main(
        dict_={
            "共训练epochs: ": cfg.TRAIN.EPOCHS,
            "训练耗时: ": smtp.date_gap(start_time, end_time),
            "最低val_mae: ": best_val_mae,
            "平均val_mae: ": np.array(val_mae_list).mean(),
            "vale_mae_list: ": val_mae_list,
            "train_loss_list: ": train_loss_list,
            "val_loss_list: ": val_loss_list,
            "MODEL.IMG_SIZE: ": cfg.MODEL.IMG_SIZE,
            "BATCH_SIZE: ": cfg.BATCH_SIZE,
            "LOSS.l1: ": l1loss,
            "TRAIN.LR: ": cfg.TRAIN.LR,
            "TRAIN.LR_DECAY_STEP: ": cfg.TRAIN.LR_DECAY_STEP,
            "TRAIN.LR_DECAY_RATE:": cfg.TRAIN.LR_DECAY_RATE,
            "TRAIN.OPT: ": cfg.TRAIN.OPT,
            "MODEL.ARCH:": cfg.MODEL.ARCH
        })
    return best_val_mae

Example #4

Show file

File: train_ce.py Project: Tomasyao77/age-estimation-pytorch

        "l1loss": False
    })
    time.sleep(180)
    main({
        "data_dir": data_dir,
        "tensorboard": tf_log,
        "checkpoint": ckpt,
        "ifSE": True,
        "l1loss": False
    })
    time.sleep(180)
    main({
        "data_dir": data_dir,
        "tensorboard": tf_log,
        "checkpoint": ckpt,
        "ifSE": True,
        "l1loss": True
    })
    time.sleep(180)
    main({
        "data_dir": data_dir,
        "tensorboard": tf_log,
        "checkpoint": ckpt,
        "ifSE": True,
        "l1loss": True
    })
    ###########################################################################################################
    end_time = smtp.print_time("全部训练结束!!!")
    print(smtp.date_gap(start_time, end_time))
    smtp.main(dict_={"ceface全部训练耗时: ": smtp.date_gap(start_time, end_time)})

Example #5

Show file

def process_pairwise_edges(dataset_label, matrixfile, pairwised_file, pvlambda,
                           config, results_path, do_pubcrawl, contacts,
                           keep_unmapped, featureInterestingFile):
    """
	Include edges where nodes are in original set, direction does not matter so do not populate edge if A->B if B->A are in hash
	Expected tab delimited columns are nodeA nodeB pvalue correlation numNonNA	
	"""
    edges_hash = {}
    max_pv = -1000.0
    max_pv_corr = -1000.0
    mydb = db_util.getDBSchema(config)  #config.get("mysql_jdbc_configs", "db")
    myuser = db_util.getDBUser(
        config)  #config.get("mysql_jdbc_configs", "username")
    mypw = db_util.getDBPassword(
        config)  #config.get("mysql_jdbc_configs", "password")
    myhost = db_util.getDBHost(config)
    myport = db_util.getDBPort(config)
    mysolr = db_util.getSolrPath(config)
    edges_file = open(pairwised_file)
    fIntHash = parse_features_rfex.get_feature_interest_hash(
        featureInterestingFile)
    edge_table = mydb + ".mv_" + dataset_label + "_feature_networks"
    efshout = open(results_path + 'load_edges_' + dataset_label + '.sh', 'w')
    solrshout = open(results_path + 'load_solr_' + dataset_label + '.sh', 'w')
    edges_out_re = open(
        results_path + 'edges_out_' + dataset_label + '_pw_re.tsv', 'w')
    edges_out_pc = open(
        results_path + 'edges_out_' + dataset_label + '_pw_pc.tsv', 'w')
    edges_meta_json = open(
        results_path + 'edges_out_' + dataset_label + '_meta.json', 'w')
    unmappedPath = results_path + 'edges_out_' + dataset_label + '_pw_unmapped.tsv'
    unmappedout = open(unmappedPath, 'w')
    features_file = open(results_path + dataset_label + '_features_out.tsv',
                         'r')
    features_hash = {}
    for fl in features_file.readlines():
        ftk = fl.strip().split("\t")
        features_hash[ftk[1]] = ftk
        features_file.close()

    validEdgeId = 1
    invalidEdges = 0
    dupeEdges = 0
    totalEdges = 0
    cnan = 0
    pcc = 0
    unMapped = 0
    for line in edges_file:
        totalEdges += 1
        line = line.strip()
        tokens = line.split('\t')
        if (len(tokens) < 11):
            if (validEdgeId == 1):
                print "Skipping header/line 1 for insufficient token reasons"
                continue
            print "ERROR: requires 11 tokens, found:" + str(
                len(tokens)) + " Skipping line\n" + line
            continue
        nodeA = tokens[0]
        nodeB = tokens[1]

        try:
            f1genescore = fIntHash[nodeA]
        except KeyError:
            f1genescore = 0
        try:
            f2genescore = fIntHash[nodeB]
        except KeyError:
            f2genescore = 0

        if (db_util.isUnmappedAssociation(nodeA, nodeB)
                and keep_unmapped == 0):
            unmappedout.write(nodeA + "\t" + nodeB + "\n")
            unMapped += 1
            continue
        #nodeA = nodeA.replace('|', '_')
        #nodeB = nodeB.replace('|', '_')
        try:
            features_hash[nodeA]
        except KeyError:
            print "key error in resolving featureId for " + nodeA + " skipping edge."
            continue
        try:
            features_hash[nodeB]
        except KeyError:
            print "key error in resolving featureId for " + nodeB + " skipping edge."
            continue

        if (features_hash[nodeA] and features_hash[nodeB]):
            if (not edges_hash.get(nodeA + "_" + nodeB)
                    and not edges_hash.get(nodeA + "_" + nodeB)):
                feature1id = ""  #str(features_hash[nodeA])
                feature2id = ""  #str(features_hash[nodeB])
                #This will need to be improve once all pairs has annotations
                try:
                    feature1id = str(features_hash[nodeA][0])
                except KeyError:
                    print "ERROR: key error in resolving featureId for " + nodeA
                try:
                    feature2id = str(features_hash[nodeB][0])
                except:
                    print "ERROR: key error in resolving featureId for " + nodeB

                edges_hash[nodeA + "_" + nodeB] = validEdgeId
                validEdgeId += 1
                dataA = process_feature_alias(nodeA)
                label1_desc = ""
                dataB = process_feature_alias(nodeB)
                label2_desc = ""
                if (len(dataA) == 7):
                    dataA.append("")
                    nodeA = nodeA + ":"
                if (len(dataB) == 7):
                    dataB.append("")
                    nodeB = nodeB + ":"
                correlation_str = tokens[2]
                try:
                    correlation = float(correlation_str)
                except ValueError:
                    #Align correlation value to NaN
                    cnan += 1
                    correlation = float('nan')
                    correlation_str = ''
                numna = tokens[3]
                pv_str = tokens[4]
                bonf = tokens[5]
                pv_bonf_str = tokens[6]
                numnaf1 = tokens[7]
                pvf1_str = tokens[8]
                numnaf2 = tokens[9]
                pvf2_str = tokens[10]
                try:
                    pv = str(pvlambda(float(pv_str)))
                    pv_bonf = str(pvlambda(float(pv_bonf_str)))
                    pvf1 = str(pvlambda(float(pvf1_str)))
                    pvf2 = str(pvlambda(float(pvf2_str)))
                except ValueError:
                    #error in pairwise script, ignore these associations for now
                    continue

                if (float(pv) > max_pv):
                    max_pv = float(pv)

                if (float(pv_bonf) > max_pv_corr):
                    max_pv_corr = float(pv_bonf)

                rho = str(db_util.sign(correlation) * abs(float(pv)))

                link_distance = 500000000
                if (len(tokens) >= 12):
                    link_distance = int(tokens[11])
                else:
                    if (len(dataA) >= 5 and len(dataB) >= 5
                            and db_util.is_numeric(dataA[4]) >= 1
                            and db_util.is_numeric(dataB[4]) >= 1
                            and dataA[3] == dataB[3]):
                        link_distance = abs(int(dataB[4]) - int(dataA[4]))
                edges_out_re.write(feature1id + "\t" + feature2id + "\t" +
                                   nodeA + "\t" + "\t".join(dataA) + "\t" +
                                   nodeB + "\t" + "\t".join(dataB) + "\t" +
                                   correlation_str + "\t" + numna + "\t" + pv +
                                   "\t" + bonf + "\t" + pv_bonf + "\t" +
                                   numnaf1 + "\t" + pvf1 + "\t" + numnaf2 +
                                   "\t" + pvf2 + "\t" + rho + "\t" +
                                   str(link_distance) + "\t" +
                                   str(f1genescore) + "\t" + str(f2genescore) +
                                   "\n")
                if (do_pubcrawl == "yes"):
                    #call andrea code
                    getPairwiseInfo.processLine(line, edges_out_pc)
                    pcc += 1
            else:
                print "duplicated edge:" + nodeA + "_" + nodeB
                dupeEdges += 1
        else:
            print "invalid edge nodeA and nodeB not in features:" + nodeA + "_" + nodeB
            invalidEdges += 1
    print "Report: Valid Edges %i Duped %i cNAN %i \nunMapped %i Saved to %s \nTotal %i max_pvalue %f max_pvalue_corr %f" % (
        validEdgeId - 1, dupeEdges, cnan, unMapped, unmappedPath, totalEdges,
        max_pv, max_pv_corr)
    edges_meta_json.write('{"max_logpv":%f}' % (max_pv))
    edges_file.close()
    edges_out_re.close()
    edges_out_pc.close()
    edges_meta_json.close()
    unmappedout.close()
    efshout.write("#!/bin/bash\n")
    efshout.write(
        "mysql -h %s --port %s --user=%s --password=%s --database=%s<<EOFMYSQL\n"
        % (myhost, myport, myuser, mypw, mydb))
    efshout.write("load data local infile '" + edges_out_re.name +
                  "' replace INTO TABLE " + edge_table +
                  " fields terminated by '\\t' LINES TERMINATED BY '\\n';\n")
    efshout.write("\ncommit;")
    efshout.write("\nEOFMYSQL")
    efshout.close()
    print "Begin pairwise db bulk upload " + time.ctime()
    os.system("sh " + efshout.name)
    #create sharded association files for solr import
    solrshout.write("#!/bin/bash\n")
    solrshout.write("python createPWShardedDataset.py " + edges_out_re.name +
                    " " + dataset_label + "\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core0/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\""
        + dataset_label + "\"</query></delete>'\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core1/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\""
        + dataset_label + "\"</query></delete>'\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core2/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\""
        + dataset_label + "\"</query></delete>'\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core3/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\""
        + dataset_label + "\"</query></delete>'\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core4/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\""
        + dataset_label + "\"</query></delete>'\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core5/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\""
        + dataset_label + "\"</query></delete>'\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core6/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\""
        + dataset_label + "\"</query></delete>'\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core7/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\""
        + dataset_label + "\"</query></delete>'\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core0/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @"
        + edges_out_re.name +
        "_core0_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core1/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @"
        + edges_out_re.name +
        "_core1_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core2/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @"
        + edges_out_re.name +
        "_core2_final.tsv  -H 'Content-type:text/plain;charset=utf-8' &\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core3/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @"
        + edges_out_re.name +
        "_core3_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core4/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @"
        + edges_out_re.name +
        "_core4_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core5/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @"
        + edges_out_re.name +
        "_core5_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core6/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @"
        + edges_out_re.name +
        "_core6_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
    solrshout.write(
        "curl '" + mysolr +
        "/core7/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @"
        + edges_out_re.name +
        "_core7_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
    solrshout.close()
    print "Begin pairwise solr upload " + time.ctime()
    os.system("sh " + solrshout.name)
    if (do_pubcrawl == "yes"):
        print "senting Pubcrawl notification to " + contacts
        smtp.main(
            "*****@*****.**", contacts,
            "Notification - New Pairwise Associations for PubCrawl",
            "New pairwise associations ready for PubCrawl load\n" +
            edges_out_pc.name + "\n\n" + str(pcc) + " Total Edges\n\n" +
            edges_out_re.name +
            " loaded into RegulomeExplorer, dataset label is " +
            dataset_label + " \n\n")

Example #6

Show file

if __name__ == '__main__':
    # fgnet train 82 group
    start_time = smtp.print_time("全部开始训练!!!")
    fgnet_root = cfg.dataset.fgnet_leave1out
    best_val_mae_arr = []
    for i in range(1, 83):
        tmp = str(i) if i > 9 else "0" + str(i)
        data_dir = Path(fgnet_root).joinpath(tmp)
        best_val_mae_arr.append(main(str(data_dir)))
    print(
        f"fgnet all train finished and best_val_mae_arr is:{best_val_mae_arr}")
    end_time = smtp.print_time("全部训练结束!!!")
    print(smtp.date_gap(start_time, end_time))
    smtp.main(
        dict_={
            "fgnet全部训练耗时: ": smtp.date_gap(start_time, end_time),
            "best_val_mae_arr": best_val_mae_arr
        })

    time.sleep(600)  # sleep 10 min

    # fgnet_align train 82 group
    start_time = smtp.print_time("全部开始训练!!!")
    fgnet_align_root = cfg.dataset.fgnet_align_leave1out
    best_val_mae_arr = []
    for i in range(1, 83):
        tmp = str(i) if i > 9 else "0" + str(i)
        data_dir = Path(fgnet_align_root).joinpath(tmp)
        best_val_mae_arr.append(main(str(data_dir)))
    print(
        f"fgnet_align all train finished and best_val_mae_arr is:{best_val_mae_arr}"

Example #7

Show file

def process_associations_rfex(dataset_label, matrixfile, associationsfile, config, annotations, collapse_direction, reverse_direction, results_path, pv_lambda, do_pubcrawl, contacts, keep_unmapped, featureInterestingFile):
	mydb = db_util.getDBSchema(config) 
	myuser = db_util.getDBUser(config) 
	mypw = db_util.getDBPassword(config) 
	myhost = db_util.getDBHost(config) 
	myport = db_util.getDBPort(config)
	mysolr = db_util.getSolrPath(config)
	if (not os.path.isfile(associationsfile)):
		print associationsfile + " does not exist; unrecoverable ERROR"
		sys.exit(-1)
	associations_table = mydb + ".mv_" + dataset_label + "_feature_networks"
	print "Begin processing associations %s Applying processing_pubcrawl %s" %(time.ctime(), do_pubcrawl)
	fIntHash = parse_features_rfex.get_feature_interest_hash(featureInterestingFile)

	edges_out_re = open(results_path + 'edges_out_' + dataset_label + '_rface_re.tsv','w')
	associations_in = open(associationsfile,'r')
	annotation_hash, ftype = parse_features_rfex.process_feature_annotations(annotations)
	fshout = open(results_path + 'load_sql_associations_' + dataset_label + '.sh','w')
	solrshout = open(results_path + 'load_solr_assocations_' + dataset_label + '.sh','w')
	unmappedPath = results_path  + 'edges_out_' + dataset_label + '_rface_unmapped.tsv'
	unmappedout = open(unmappedPath,'w')
	features_file = open(results_path + dataset_label + '_features_out.tsv','r')
	features_hash = {}
	for fl in features_file.readlines():
		ftk = fl.strip().split("\t")
		features_hash[ftk[1]] = ftk
	features_file.close()
		
	aliasid_file = open(results_path + dataset_label + '_features_alias_id.tsv','r')
	aliasid_hash = {}
	for fl in aliasid_file.readlines():
		ftk = fl.strip().split("\t")
		aliasid_hash[ftk[0]] = ftk
	aliasid_file.close()	
	
	tsvout = open(results_path + 'edges_out_' + dataset_label + '_rface_re.tsv','w')
	pubcrawl_tsvout = open(results_path + 'edges_out_' + dataset_label + '_rface_pc.tsv','w')
	lc = 0
	edgeCount = 0
	pcc = 0
	unMapped = 0
	pvalueCutCount = 0
	impCut = 0
	lines = associations_in.readlines()
	associations_in.close()
	associations_dic = {}
	for line in lines:
		lc = lc + 1
		columns = line.strip().split('\t')
		if (len(columns) < 5):
			print "Missing required tokens in associations lineIndex %i lineValue %s" %(lc, line)
			continue
		f1alias = columns[0]
		#afm_ids will be used for directionality collapsing, if needed
		f1afm_id = columns[0]
		f2afm_id = columns[1]
		if (len(f1alias.split(":")) < 3):
			annotated_feature = annotation_hash.get(f1alias)
			if (annotated_feature == None):
				print "ERROR: Target feature %s is not in afm/annotation %i" %(f1alias, len(annotation_hash))
				continue
			f1alias = annotated_feature.replace("\t", ":")
		f2alias = columns[1]
		if (len(f2alias.split(":")) < 3):
			annotated_feature = annotation_hash.get(f2alias)
			if (annotated_feature == None):
				print "ERROR: Predictor feature %s is not in afm/annotation" %(f2alias)
				continue
			f2alias = annotated_feature.replace("\t", ":")
		try:
			f1genescore = fIntHash[f1alias]
		except KeyError:
			f1genescore = 0
		try:
			f2genescore = fIntHash[f2alias]
		except KeyError:
			f2genescore = 0
		
		f1data = f1alias.split(':')
		f2data = f2alias.split(':')

		if len(f1data) > 4:
			f1data[3] = f1data[3][3:]
		if len(f2data) > 4:
			f2data[3] = f2data[3][3:]
		
		if (len(f1data) <= 7 and (f1data[1] == 'CLIN' or f1data[1] == 'SAMP')):
			f1alias = ":".join(f1data[0:3]) + ":::::"
			f1data = f1alias.split(':')
		elif (len(f1data) == 7):
			f1data.append("")
		if (len(f2data) <= 7 and (f2data[1] == 'CLIN' or f2data[1] == 'SAMP')):
			f2alias = ":".join(f2data[0:3]) + ":::::"
			f2data = f2alias.split(':')
		elif (len(f2data) == 7):
			f2data.append("") 
		f1aliasOmic = f1alias
		f2aliasOmic = f2alias
		#for annotations
		try:    
			f1id = features_hash[f1alias][0]
		except KeyError:
			try:
				f1id = aliasid_hash[f1alias][1]
				f1aliasOmic = aliasid_hash[f1alias][2]
				f1data = f1aliasOmic.split(':')
				f1data[3] = f1data[3][3:]
			except KeyError:
				print "Skipping Key error with alias1 " + f1alias
                                continue
			
		try:
			f2id = features_hash[f2alias][0]#f2alias.split(":")[-1]
		except KeyError:
			try:
				f2id = aliasid_hash[f2alias][1]
				f2aliasOmic = aliasid_hash[f2alias][2]
				f2data = f2aliasOmic.split(':')
				f2data[3] = f2data[3][3:]	
			except KeyError:
				print "Skipping Key error with alias2 " + f2alias
				continue		

		pvalue = float(columns[2])
		pvalue = str(pv_lambda(pvalue))
		
		importance = columns[3]
		correlation = columns[4]
		patientct = columns[5]
		if (db_util.isUnmappedAssociation(f1alias, f2alias) and keep_unmapped == 0):
			unmappedout.write(f1alias + "\t" + f2alias + "\n")
			unMapped += 1
			continue	
		rhoscore = ""
		link_distance = -1
		if (len(f1data) >=5 and len(f2data)>=5 and db_util.is_numeric(f1data[4]) >= 1 and db_util.is_numeric(f2data[4]) >= 1 and f1data[3] == f2data[3]):
			link_distance = abs(int(f2data[4]) - int(f1data[4]))
		if (collapse_direction == 0):
			associations_dic[f1afm_id + "_" + f2afm_id] = f1aliasOmic + "\t" + f2aliasOmic + "\t" + pvalue + "\t" + importance + "\t" + correlation + "\t" + patientct + "\t" + f1id + "\t" + "\t".join(f1data) + "\t" + f2id + "\t" + "\t".join(f2data) + "\t" + str(f1genescore) + "\t" + str(f2genescore) + "\t" + rhoscore + "\t" + str(link_distance) + "\n"
		else:
			#check whether (f1 -> f2 or f2 -> f1) exists, if yes, take the more important
			#if not, store pair
			if ((associations_dic.get(f1afm_id + "_" + f2afm_id) == None) and (associations_dic.get(f2afm_id + "_" + f1afm_id) == None)):
				associations_dic[f1afm_id + "_" + f2afm_id] = f1aliasOmic + "\t" + f2aliasOmic + "\t" + pvalue + "\t" + importance + "\t" + correlation + "\t" + patientct + "\t" + f1id + "\t" + "\t".join(f1data) + "\t" + f2id + "\t" + "\t".join(f2data) + "\t" + str(f1genescore) + "\t" + str(f2genescore) + "\t" + rhoscore + "\t" + str(link_distance) + "\n"
			else:
				existingLink = associations_dic.get(f1afm_id + "_" + f2afm_id)
				ekey = f1afm_id + "_" + f2afm_id
				if (existingLink == None):
					existingLink = associations_dic.get(f2afm_id + "_" + f1afm_id) 
					ekey = f2afm_id + "_" + f1afm_id
				prevImportance = existingLink.split("\t")[3]
				if (float(importance) > float(prevImportance)):
					associations_dic[ekey] = f1aliasOmic + "\t" + f2aliasOmic + "\t" + pvalue + "\t" + importance + "\t" + correlation + "\t" + patientct + "\t" + f1id + "\t" + "\t".join(f1data) + "\t" + f2id + "\t" + "\t".join(f2data) + "\t" + str(f1genescore) + "\t" + str(f2genescore) + "\t" + rhoscore + "\t" + str(link_distance) + "\n"					 			 
		if (reverse_direction == 1):
			associations_dic[f2afm_id + "_" + f1afm_id] = f2aliasOmic + "\t" + f1aliasOmic + "\t" + pvalue + "\t" + importance + "\t" + correlation + "\t" + patientct + "\t" + f2id + "\t" + "\t".join(f2data) + "\t" + f1id + "\t" + "\t".join(f1data) + "\t" + str(f2genescore) + "\t" + str(f1genescore) + "\t" + rhoscore + "\t" + str(link_distance) + "\n"
			edgeCount = edgeCount + 1
		edgeCount = edgeCount + 1
		if (do_pubcrawl == "yes"):
			getRFACEInfo.processLine(line, pubcrawl_tsvout)
			pcc += 1
	for ei in associations_dic:
		tsvout.write(associations_dic[ei])
	fshout.write("#!/bin/bash\n")
	fshout.write("mysql -h %s --port %s --user=%s --password=%s --database=%s<<EOFMYSQL\n" %(myhost, myport, myuser, mypw, mydb))
	fshout.write("load data local infile '" + tsvout.name + "' replace INTO TABLE " + associations_table + " fields terminated by '\\t' LINES TERMINATED BY '\\n';")
	fshout.write("\nEOFMYSQL\n")
	tsvout.close()
	unmappedout.close()
	pubcrawl_tsvout.close()
	fshout.close()
	print "\nReport: ValidEdges %i ImportanceCutoff %i edges filtered %i \nunMapped Edges %i Saved to %s" %(len(associations_dic), impCut, pvalueCutCount, unMapped, unmappedPath)
	print "Begin RF-ACE db bulk upload %s os.system sh %s" %(time.ctime(), fshout.name)
	os.system("sh " + fshout.name)
	solrshout.write("#!/bin/bash\n")
	solrshout.write("python createRFShardedDataset.py " + edges_out_re.name + " " + dataset_label + "\n")
	solrshout.write("curl '" + mysolr + "/core0/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
	solrshout.write("curl '" + mysolr + "/core1/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
	solrshout.write("curl '" + mysolr + "/core2/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
	solrshout.write("curl '" + mysolr + "/core3/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
	solrshout.write("curl '" + mysolr + "/core4/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
	solrshout.write("curl '" + mysolr + "/core5/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
	solrshout.write("curl '" + mysolr + "/core6/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
	solrshout.write("curl '" + mysolr + "/core7/update/?commit=true' -H 'Content-type:text/xml' --data-binary '<delete><query>dataset:\"" + dataset_label + "\"</query></delete>'\n")
	solrshout.write("curl '" + mysolr + "/core0/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core0_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core1/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core1_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core2/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core2_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core3/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core3_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core4/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core4_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core5/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core5_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core6/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core6_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.write("curl '" + mysolr + "/core7/update/csv?commit=true&separator=%09&overwrite=false&escape=\ ' --data-binary @" + edges_out_re.name + "_core7_final.tsv -H 'Content-type:text/plain;charset=utf-8' &\n")
	solrshout.close()
	print "Begin rface solr upload " + time.ctime()
	os.system("sh " + solrshout.name)

	if (do_pubcrawl == 'yes'):
		smtp.main("*****@*****.**", contacts, "Notification - New RFAce " + dataset_label + " Associations for PubCrawl", "New RFAce associations ready for PubCrawl load\n" + pubcrawl_tsvout.name + "\n" + str(pcc) + " Total Edges\n" + tsvout.name + " loaded into RegulomeExplorer, dataset label is " + dataset_label + "\n\n")
	print "Done processing associations %s" %(time.ctime())
	associations_dic = None