def send_email(): sender = Configuration().GetData()['EmailID'] gmail_password = Configuration().GetData()['Password'] COMMASPACE = ', ' recipients = ['*****@*****.**'] # Create the enclosing (outer) message outer = MIMEMultipart() outer['Subject'] = 'DataFeed @ ' + str(datetime.datetime.now().date()) outer['To'] = COMMASPACE.join(recipients) outer['From'] = sender outer.preamble = 'You will not see this in a MIME-aware mail reader.\n' msg = MIMEText('Data Feeding Start in Mongodb' + str(datetime.datetime.now())) outer.attach(msg) composed = outer.as_string() # Send the email try: with smtplib.SMTP('smtp.gmail.com', 587) as s: s.ehlo() s.starttls() s.ehlo() s.login(sender, gmail_password) s.sendmail(sender, recipients, composed) s.close() print("Email sent!") except: print("Unable to send the email. Error: ", sys.exc_info()[0]) raise
def login(self, request): try: data = json.loads(request.data.decode()) self.ip = Configuration().GetData()['PrivateIp'] self.port = Configuration().GetData()['MongoPort'] self.db = Configuration().GetData()['MongoDB'] obj = MongoDB() obj.ConnectMongo(self.ip, self.port, self.db) record = obj.ReadValue("users", data["email"]) if (record != None): record = ast.literal_eval(record['Data']) if (record['password'] == data["password"]): ret = { 'access_token': create_access_token(identity=data["email"]), 'refresh_token': create_refresh_token(identity=data["email"]), 'status': "True" } return jsonify(ret), 200 else: return jsonify({"status": "Invalid username or password"}), 401 else: return jsonify({"status": "Invalid username or password"}), 401 except Exception as e: generate_log('auth', str(e), str(request))
def get_user(self): try: email = get_jwt_identity() self.ip = Configuration().GetData()['PrivateIp'] self.port = Configuration().GetData()['MongoPort'] self.db = Configuration().GetData()['MongoDB'] obj = MongoDB() obj.ConnectMongo(self.ip, self.port, self.db) record = obj.ReadValue("users", email) record = ast.literal_eval(record['Data']) record.pop('password', None) return jsonify(record) except Exception as e: generate_log('get_user', str(e), 'get_user method')
def email_logfile(): sender = Configuration().GetData()['EmailID'] gmail_password = Configuration().GetData()['Password'] dr_tariq_team = Configuration().GetData()['team_emails'] COMMASPACE = ', ' recipients = dr_tariq_team # Create the enclosing (outer) message outer = MIMEMultipart() outer['Subject'] = 'dpdmlog @ ' + str(datetime.datetime.now().date()) outer['To'] = COMMASPACE.join(recipients) outer['From'] = sender outer.preamble = 'You will not see this in a MIME-aware mail reader.\n' # List of attachments attachments = ['sp_error.log'] # Add the attachments to the message for file in attachments: try: with open(file, 'rb') as fp: msg = MIMEBase('application', "octet-stream") msg.set_payload(fp.read()) encoders.encode_base64(msg) msg.add_header('Content-Disposition', 'attachment', filename=os.path.basename(file)) outer.attach(msg) except: print("Unable to open one of the attachments. Error: ", sys.exc_info()[0]) raise composed = outer.as_string() # Send the email try: with smtplib.SMTP('smtp.gmail.com', 587) as s: s.ehlo() s.starttls() s.ehlo() s.login(sender, gmail_password) s.sendmail(sender, recipients, composed) s.close() # print("Email sent!") except: print("Unable to send the email. Error: ", sys.exc_info()[0]) raise
def setUp(self, is_unit_test): self.ExceptionHandler = CustomException.CustomException.getInstance() self.Configuration = Configuration() self.Validation = Validation self.Setup = Setup.Setup() self.Setup.setupApp() self.logger = Debugger.Debugger.getInstance() self.Database = Database.Database.getInstance(is_unit_test) self.Database.connect(is_unit_test) self.ProjectGUI = ProjectGUI self.Setup.createTables() self.ProjectRepo = ProjectRepository.ProjectRepository() email_configuration = self.Database.getConfiguration() try: self.Configuration.setEmailConfiguration(email_configuration[0]) except: self.Configuration.setEmailConfiguration(email_configuration) pass self.ProjectsList = {} self.queue = {} self.loadAllProjects()
def getdata(self): app = TestApp("0.0.0.0", 4001, 10) for com in Configuration().GetData()['CompanyList']: ibcontract = IBcontract() ibcontract.secType = "STK" ibcontract.lastTradeDateOrContractMonth = "201809" ibcontract.symbol = com ibcontract.exchange = "SMART" resolved_ibcontract = app.resolve_ib_contract(ibcontract) dataset1 = { 0: ['20190502 13:30:00', '20190502 16:00:00'], 1: [209.95, 208.65], 2: [212.65, 210.29], 3: [208.13, 208.41], 4: [208.63, 209.17], 5: [149612, 100915] } durationstr = "3600 S" # historic_data = app.get_IB_historical_data(resolved_ibcontract, durationstr, bar) df = pd.DataFrame(dataset1) df.rename(columns={ 0: "date", 1: "open", 2: "high", 3: "low", 4: "close", 5: "volume" }, inplace=True) for bar in barSize: dataset = self.strategy(df) print(com) print(bar) # MongoStore().Feed_IntraDay(com, bar, dataset) print(df)
def data_feed(): app = TestApp("0.0.0.0", 4001, 9) for com in Configuration().GetData()['CompanyList']: ibcontract = IBcontract() ibcontract.secType = "STK" ibcontract.lastTradeDateOrContractMonth = "201809" ibcontract.symbol = com ibcontract.exchange = "SMART" resolved_ibcontract = app.resolve_ib_contract(ibcontract) durationstr = "1 D" for bar in barSize: historic_data = app.get_IB_historical_data(resolved_ibcontract, durationstr, bar) signal = 1 dataset = pd.DataFrame(historic_data) dataset['signal'] = signal for index, row in dataset.iterrows(): if dataset['open'][index] > dataset['close'][index]: signal = 1 else: signal = 0 print(signal) dataset['signal'][index] = signal print(com) print(bar) Feed_IntraDay(com, bar, dataset) print(dataset)
def loadAccesibility(pdbId, chainType="l", rasaThr=10.0): ''' Loads psaia files for por a given pdbId and returns a set of accesible resIds and non-accesible resIds. :param pdbId: str. The identifier for pdb file :param chainType: str. "l" for ligan and "r" for receptor :param rasaThr: float. A threshold of relative asa to decide whether or not a residue is accesible or not :return (accesibleSet, nonAccesibleSet) accesibleSet: set(str[]). Set of resIds of residues that are accesible according to PSAIA and the threshold nonAccesibleSet: set(str[]). Set of resIds of residues that are non-accesible according to PSAIA and the threshold ''' PSAIA_PATH = os.path.join(Configuration().computedFeatsRootDir, "structStep/PSAIA/procPSAIA") accesibleSet=set([]) nonAccesibleSet=set([]) for fname in os.listdir(PSAIA_PATH): if fname.startswith(pdbId+"_"+chainType) and fname.endswith(".psaia.tab"): with open(os.path.join(PSAIA_PATH, fname)) as f_: f_.readline() for line in f_: lineArray= line.split() chainId= lineArray[0] resId= lineArray[1] res_full_id= chainId+"_"+resId if float(lineArray[8])> rasaThr: accesibleSet.add(res_full_id) else: nonAccesibleSet.add(res_full_id) return accesibleSet, nonAccesibleSet
def __init__(self): self.name = "user" self.ip = Configuration().GetData()['PrivateIp'] self.port = Configuration().GetData()['MongoPort'] self.db = Configuration().GetData()['MongoDB'] self.email = Configuration().GetData()['EmailID'] self.password = Configuration().GetData()['Password'] self.reg_emailLink = Configuration().GetData()['RegisterEmail'] self.forgot_passwordLink = Configuration().GetData()['ForgotPassword'] self.email_ids = Configuration().GetData()['Email'] self.sp_link = Configuration().GetData()['SP'] self.mongoObj = MongoDB() self.mongoObj.ConnectMongo(self.ip, self.port, self.db)
def computeFeaturesAllComplexes(OneFeaturesComputerClass, pdbsIndir, computedFeatsRootDir, classArgs={}, ncpu=1): ''' Computes one type of feature over all complexes that are inside pdbsIndir. @param OneFeaturesComputerClass: FeaturesComputer. class to use for compute one kind of features @param pdbsIndir: str. path where pdb files to be computed are located. There must be 2 pdb files per complex To distinguish them _l_ or _r_ infixed are used. P.e: "1A2K_l_u.pdb" and "1A2K_r_u.pdb". pdb files ended with "b.pdb" will be skipped. @param computedFeatsRootDir: str. path where features will be stored @param classArgs: Dict. The arguments that will be passed to OneFeaturesComputerClass() @param ncpu: the number of subprocess to use in parallel (parallelism at complex level) ''' pdbsIndir = os.path.expanduser(pdbsIndir) computedFeatsRootDir = os.path.expanduser(computedFeatsRootDir) ConfigObject = Configuration() if pdbsIndir == None: pdbsIndir = ConfigObject.pdbsIndir if computedFeatsRootDir == None: computedFeatsRootDir = ConfigObject.computedFeatsRootDir fnames = {} for fname in sorted(os.listdir(pdbsIndir)): if not fname.endswith(".pdb"): continue #skip no pdb files if not fname.endswith("b.pdb"): prefix = fname.split("_")[0] if "_r_" in fname or "_l_" in fname: if prefix not in fnames: fnames[prefix] = [None, None] if "_r_" in fname: fnames[prefix][0] = os.path.join(pdbsIndir, fname) if "_l_" in fname: fnames[prefix][1] = os.path.join(pdbsIndir, fname) else: fnames[prefix].append(os.path.join(pdbsIndir, fname)) if len(fnames) < 1: raise ValueError("There are not files to be processed") for prefix in fnames: # check for errors if len(fnames[prefix]) > 2 or sum( [1 for elem in fnames[prefix] if elem is None]): raise ValueError( "There must be just 2 pdb files for each complex to be predicted" ) else: fnames[prefix] = tuple(fnames[prefix]) Parallel(n_jobs=ncpu, backend="multiprocessing", batch_size=2)(delayed(computeFunction)( OneFeaturesComputerClass, fnames[prefix][0], fnames[prefix][1], computedFeatsRootDir, classArgs) for prefix in sorted(fnames))
def get_history(self): try: email = get_jwt_identity() self.ip = Configuration().GetData()['PrivateIp'] self.port = Configuration().GetData()['MongoPort'] self.db = Configuration().GetData()['MongoDB'] obj = MongoDB() obj.ConnectMongo(self.ip, self.port, self.db) record = obj.ReadValue("history", email) if record != None: record = ast.literal_eval(record["Data"]) toreturn = {"status": "True", "record": record} else: # record = ast.literal_eval(record["Data"]) toreturn = {"status": "False"} return jsonify(toreturn) except Exception as e: generate_log('get_history', str(e))
def checkIfAllCodified(self, inputPdbs=None): if inputPdbs is None: # Default parameters conf = Configuration() inputPdbs = conf.pdbsIndir check0 = self.checkIfAllCmapsComputed(inputPdbs) check1 = self.checkIfAllCodifiedWorker(self.trainingDataPath) check2 = self.checkIfAllCodifiedWorker(self.testingDataPath) return check0 and check1 and check2
def set_default_params(self): cfg = Configuration() self.learning_rate_value.setText(str(cfg.learning_rate)) self.learning_steps_value.setText("200 400") self.decay_value.setText(str(cfg.decay)) self.momentum_value.setText(str(cfg.momentum)) self.epoch_value.setText(str(cfg.number_of_epochs)) self.iterations_value.setText(str(cfg.number_of_iterations)) self.path_content.setText('Dataset') self.dir_path = 'Dataset' self.class_value.setText(str(cfg.number_of_classes)) self.seed_value.setText(str(cfg.seed)) self.warm_up_value.setText(str(cfg.iterations_to_warmup))
def data_feed(): app = TestApp("0.0.0.0", 4001, 10) for com in Configuration().GetData()['CompanyList']: ibcontract = IBcontract() ibcontract.secType = "STK" ibcontract.lastTradeDateOrContractMonth="201809" ibcontract.symbol = com ibcontract.exchange = "SMART" resolved_ibcontract = app.resolve_ib_contract(ibcontract) bar='1 sec' durationstr = "120 sec" historic_data = app.get_IB_historical_data(resolved_ibcontract,durationstr,bar) print(com) print(bar) Feed_IntraDay(com,bar,historic_data) print(historic_data)0
def __init__(self, dataRootPath=None, featuresToInclude=None): if dataRootPath is None: dataRootPath = Configuration().computedFeatsRootDir if featuresToInclude is None: featuresToInclude = FEATURES_TO_INCLUDE else: if "psaia" not in zip(*featuresToInclude)[0]: featuresToInclude.insert(0, ("psaia", ("structStep/PSAIA/procPSAIA", [8], { "total_RASA": 8 }))) self.psaiaIndex = zip(*featuresToInclude)[0].index("psaia") DataLoader.__init__(self, dataRootPath, featuresToInclude) self.rasaLDict = {} self.rasaRDict = {}
def moveAndWriteAsPDBIfMmcif(fnameIn, fnameOut, removeInput=False): from Config import Configuration conf = Configuration() minNumResidues, maxNumResidues = conf.minNumResiduesPartner, conf.maxNumResiduesPartner try: struct, __ = loadPdbIfIsPath(fnameIn) totalNumRes = 0 for chain in struct[0]: nResInChain = len(chain.get_list()) totalNumRes += nResInChain if not (minNumResidues < totalNumRes < maxNumResidues): raise BadNumberOfResidues(totalNumRes) else: writter = PDBIO() writter.set_structure(struct) writter.save(fnameOut) if removeInput: os.remove(fnameIn) return True except Exception as e: print("Error in moveAndWriteAsPDBIfMmcif !!!", e) return False
def excutePatchDock(lPdbFname, rPdbFname, lBindingSite, rBindingSite, patchDockWdir, writeOnlyLigand=False, cleanWorkingDir=False): conf = Configuration() patchDockRootDir = conf.patchDockRootDir lPdbFname = uncompressPdbIfGz(lPdbFname) rPdbFname = uncompressPdbIfGz(rPdbFname) configStr = patchDockTemplate % { "lPdbFname": lPdbFname, "rPdbFname": rPdbFname, "patchDockRootDir": patchDockRootDir, "patchDockWdir": patchDockWdir } configFname = "%(patchDockWdir)s/config.txt" % { "patchDockWdir": patchDockWdir } resultsFname = "%(patchDockWdir)s/results.patchdock" % { "patchDockWdir": patchDockWdir } myMakeDir(patchDockWdir) with open(configFname, "w") as f: f.write(configStr) writeBindingSite(lBindingSite, patchDockWdir, chainType="l") writeBindingSite(rBindingSite, patchDockWdir, chainType="r") cmd = [ os.path.join(patchDockRootDir, "patch_dock.Linux"), configFname, resultsFname ] proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=patchDockWdir) print(" ".join(cmd)) output = proc.communicate() print(output[0]) print( "\n?????????????????????????????????????????????????????????????????????\n" ) print(output[1]) if "error" in output[1]: raise Exception("Error executing patchDock") listOfSelectedModels = [] with open(resultsFname) as f: for line in f: if "# | score | pen." in line: break for i, line in enumerate(f): if i >= N_MODELS_TO_EXTRACT: break lineArray = line.split("|") if len(lineArray) > 0: score = float(lineArray[1].strip()) transformations = lineArray[-1].split() rots = [float(elem) for elem in transformations[:3]] trans = [float(elem) for elem in transformations[3:]] listOfSelectedModels.append((i, score, rots, trans)) for i, score, rots, trans in listOfSelectedModels: rotX, rotY, rotZ = rots transX, transY, transZ = trans fnameOut = os.path.join(patchDockWdir, "results.patchdock.%d.pdb" % (i + 1)) rotateTranslatePdb(lPdbFname, rotX, rotY, rotZ, transX, transY, transZ, fnameOut=fnameOut) if not writeOnlyLigand: cmd = "cat %(rPdbFname)s %(fnameOut)s > %(fnameOut)s.tmp && mv %(fnameOut)s.tmp %(fnameOut)s " % locals( ) proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True, cwd=patchDockWdir) output = proc.communicate() print(output[0]) print(output[1]) if "error" in output[1]: raise Exception( "Error concatenating ligand and receptor pdb files") if cleanWorkingDir: cleanDirectory(lPdbFname, rPdbFname, patchDockWdir) print("patchDock DONE") return listOfSelectedModels
import os, sys import pandas as pd from Config import Configuration from subprocess import Popen, PIPE, check_call from ast import literal_eval as make_tuple from Bio import pairwise2 from Bio.SubsMat import MatrixInfo as matlist cd_hit_path = "/home/rsanchez/Tesis/rriPredMethod/dependencies/bioinformaticTools/cdhit-master/cd-hit" computedFeatsRootDir = Configuration().computedFeatsRootDir sequencesPath_root = os.path.join(computedFeatsRootDir, "seqStep", "extractedSeqs") sequencesPath = os.path.join(sequencesPath_root, "seqsData") resIdsMap = os.path.join(sequencesPath_root, "seqToStructMap") cMapPath = os.path.join(computedFeatsRootDir, "common", "contactMaps") newCmPath = os.path.join(computedFeatsRootDir, "common", "contactMapsBinding") aligsFastaName = "/tmp/aligsFastaName.fa" cdhit_out = "/tmp/cdhit.out" scoreMat = matlist.blosum62 def computeCD_hit(): cmd = "awk 1 %s/*.fasta > %s" % (sequencesPath, aligsFastaName) print(cmd) check_call(cmd, shell=True) proc = Popen([cd_hit_path, "-i", aligsFastaName, "-o", cdhit_out]) outCdhit = proc.communicate()
def __init__(self): self.name = "DisplayStockApi" self.display_data = display_data() self.Company = Configuration().GetData()['CompanyList']
def __init__(self): self.name = "predictionApi" self.lstm = lstm() self.Company = Configuration().GetData()['CompanyList']
def main(): cfg = Configuration() parameters = { "seed": cfg.seed, "number_of_epochs": cfg.number_of_epochs, "number_of_classes": cfg.number_of_classes, "number_of_iterations": cfg.number_of_iterations, "momentum": cfg.momentum, "decay": cfg.decay, "learning_rate": cfg.learning_rate, "learning_steps": cfg.learning_rate_steps, "device": cfg.device, "dataset_dir": cfg.dataset_dir, "publishing_losses_frequency": cfg.publishing_losses_frequency, "checkpoint_path": cfg.ckpt_path, "learning_rate_lambda": cfg.learning_rate_lambda, "model_path": cfg.model_path, "iterations_to_warmup": cfg.iterations_to_warmup, "result_path": cfg.result_path } device = torch.device(parameters['device']) best_model_by_maskF = 0 train_set = algorithm.COCODataset(parameters['dataset_dir'], "Train", train=True) indices = torch.randperm(len(train_set)).tolist() train_set = torch.utils.data.Subset(train_set, indices) val_set = algorithm.COCODataset(parameters['dataset_dir'], "Validation", train=True) model = algorithm.resnet50_for_mask_rcnn(True, parameters['number_of_classes']).to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD( params, lr=parameters['learning_rate'], momentum=parameters['momentum'], weight_decay=parameters['decay']) decrease = lambda x: parameters['learning_rate_lambda'] ** bisect.bisect( parameters['learning_steps'], x) starting_epoch = 0 prefix, ext = os.path.splitext(parameters['checkpoint_path']) checkpoints = glob.glob(prefix + "-*" + ext) checkpoints.sort(key=lambda x: int(re.search(r"-(\d+){}".format(ext), os.path.split(x)[1]).group(1))) if checkpoints: checkpoint = torch.load(checkpoints[-1], map_location=device) model.load_state_dict(checkpoint["model"]) optimizer.load_state_dict(checkpoint["optimizer"]) starting_epoch = checkpoint["epochs"] del checkpoint torch.cuda.empty_cache() since = time.time() print("\nalready trained: {} epochs; to {} epochs".format(starting_epoch, parameters['number_of_epochs'])) for epoch in range(starting_epoch, parameters['number_of_epochs']): print("\nepoch: {}".format(epoch + 1)) training_epoch_time = time.time() parameters['learning_epoch'] = decrease(epoch) * parameters['learning_rate'] algorithm.train_epoch(model, optimizer, train_set, device, epoch, parameters) training_epoch_time = time.time() - training_epoch_time print('training_epoch_time: ', training_epoch_time) validation_epoch_time = time.time() eval_output = algorithm.evaluate(model, val_set, device, parameters) validation_epoch_time = time.time() - validation_epoch_time print('validation_epoch_time: ', validation_epoch_time) trained_epoch = epoch + 1 maskAP = eval_output.get_AP() maskAR = eval_output.get_AR() maskF = eval_output.get_AF() print('AP: ', maskAP) print('AR: ', maskAR) print('F1: ', maskF) if maskF['mask FScore'] > best_model_by_maskF: best_model_by_maskF = maskF['mask FScore'] algorithm.save_best(model, optimizer, trained_epoch, parameters['model_path'], eval_info=str(eval_output)) algorithm.save_checkpoint(model, optimizer, trained_epoch, parameters['checkpoint_path'], eval_info=str(eval_output)) prefix, ext = os.path.splitext(parameters['checkpoint_path']) checkpoints = glob.glob(prefix + "-*" + ext) checkpoints.sort(key=lambda x: int(re.search(r"-(\d+){}".format(ext), os.path.split(x)[1]).group(1))) n = 3 if len(checkpoints) > n: for i in range(len(checkpoints) - n): os.remove("{}".format(checkpoints[i])) total_training_time = time.time() - since print('Total time: ', total_training_time)
def __init__(self): self.name = "DataFeed" self.Company = Configuration().GetData()['CompanyList']
def computeFeaturesAllPdbsOneDir(pdbsIndir=None, computedFeatsRootDir=None, methodProtocol="struct", isHomeSet=False, ncpu=2): ''' Computes all features needed for complex codification for all complexes in pdbsIndir. Used for training :param pdbsIndir: str. Path to the directory where pdb files are located. Must be named as follows: path/to/pdbsIndir/ prefix1_[chainType]_u.pdb or pdb.gz prefix2_[chainType]_u.pdb or pdb By default, it uses as pdbsIndir Config.py DEFAULT_PARAMETERS["pdbsIndir"] :param computedFeatsRootDir: str. Path where features files will be saved. By default it uses Config.py DEFAULT_PARAMETERS["computedFeatsRootDir"] will be used as out_path :param methodProtocol: str. "seq" if just sequential features will be used; "struct" if sequential and structural features will be used. "mixed" behaves as "struct" :param isHomeSet: True if h**o-dataset is to be used, False for hetero dataset or unknown :param ncpu: int. Number of cpu's to use. If -1, all cpu's will be used to parallelize at complex level ''' assert methodProtocol in ["seq", "struct", "mixed"], "Error methodProtocol in computeFeaturesAllPdbsOneDir must " + \ "be 'seq' or 'struct' or 'mixed'->"+str(methodProtocol) if pdbsIndir is None or computedFeatsRootDir is None: # Default parameters conf = Configuration() pdbsIndir = pdbsIndir if pdbsIndir else conf.pdbsIndir computedFeatsRootDir = computedFeatsRootDir if computedFeatsRootDir else conf.computedFeatsRootDir allCodifiedFname = os.path.join(computedFeatsRootDir, "allFeaturesComputed.txt") if os.path.exists(allCodifiedFname): with open(allCodifiedFname) as f: print(f.read()) return if ncpu < 1: ncpu = cpu_count() if pdbsIndir == None: pdbsIndir = conf.pdbsIndir if computedFeatsRootDir == None: computedFeatsRootDir = conf.computedFeatsRootDir pdbsIndir = os.path.expanduser(pdbsIndir) computedFeatsRootDir = os.path.expanduser(computedFeatsRootDir) fnames = OrderedDict({}) fnamesOther = OrderedDict({}) for fname in sorted(os.listdir(pdbsIndir)): if fname.endswith("_u.pdb.gz") or fname.endswith( "_u.pdb"): #skip no pdb files prefix, chainType = splitExtendedPrefix( getExtendedPrefix(fname, splitTag="_u.")) if prefix not in fnames: fnames[prefix] = [None, None] if chainType == "r": fnames[prefix][1] = os.path.join(pdbsIndir, fname) elif chainType == "l": fnames[prefix][0] = os.path.join(pdbsIndir, fname) else: raise FeatureComputerException("Error in filename %s" % fname) elif fname.endswith("_b.pdb.gz") or fname.endswith("_b.pdb"): prefix, chainType = splitExtendedPrefix( getExtendedPrefix(fname, splitTag="_b.")) if prefix not in fnamesOther: fnamesOther[prefix] = [None, None] if chainType == "r": fnamesOther[prefix][1] = os.path.join(pdbsIndir, fname) elif chainType == "l": fnamesOther[prefix][0] = os.path.join(pdbsIndir, fname) else: raise FeatureComputerException("Error in filename %s" % fname) if len(fnames) == len(fnamesOther): boundAvailable = True else: boundAvailable = False if len(fnames) < 1: raise ValueError("There are not files to be processed") for prefix in fnames: # check for errors if sum([1 for elem in fnames[prefix] if elem is None]) > 0: print(fnames[prefix]) raise ValueError( "There must be just 2 pdb files for each complex to be processed" ) if boundAvailable: if sum([1 for elem in fnamesOther[prefix] if elem is None]) > 0: raise ValueError( "There is no bound structure for some of your pdb files") print("Bound available:", boundAvailable) Parallel(n_jobs=ncpu, backend="multiprocessing", batch_size=2)(delayed(launchComputeFeaturesOneComplex)( fnames[prefix], prefix, computedFeatsRootDir=computedFeatsRootDir, boundAvailable=boundAvailable, methodProtocol=methodProtocol, checkIfLRHomo=isHomeSet) for prefix in sorted(fnames)) with open(allCodifiedFname, "w") as f: f.write("All features computed for: %d" % len(fnames))
from Config import Configuration from selenium import webdriver config_instance = Configuration() export_dir = config_instance.EXPORT_DIR options = webdriver.ChromeOptions() prefs = {'download.default_directory' : str(export_dir)} options.add_experimental_option('prefs', prefs) # Create code driver and open Chrome driver = webdriver.Chrome('C:/Users/blahova.m/Downloads/chromedriver_win32/chromedriver.exe', chrome_options=options) # Open EMS web page driver.get('http://www.emsbrno.cz/p.axd/en/Products.html') # find login link to click at, and continue to login page driver.find_element_by_link_text('Log in').click() # select username text box element username_box = driver.find_element_by_xpath('//*[@id="layout_ctl00_ctl00_content"]/table/tbody/tr[1]/td[2]/input') username_box.send_keys(config_instance.USER_NAME) # select password text box element password_box = driver.find_element_by_xpath('//*[@id="layout_ctl00_ctl00_content"]/table/tbody/tr[2]/td[2]/input') password_box.send_keys(config_instance.PASSWORD) # select login button and click driver.find_element_by_xpath('//*[@id="layout_ctl00_ctl00_content"]/table/tbody/tr[4]/td[2]/input').click() for page_url in config_instance.LINKS_ARRAY: driver.get(page_url)
def trainAndTestOneFold(trainData, testPrefixes, trainSubsetN, testPath, outputPath, verbose=False, ncpu=1): ''' Trains and tests one fold :param trainData: a numpy array for training with first column labels and the others are features :param testPrefixes: str[]. A list that contains prefixes for all complexes to be tested :param trainSubsetN: int Tuple. The numerical ids of the training split. :param testPath: str. Path to a dir where testing data files are stored :param outputPath: str. Path to a dir where predictions will be stored. None if results will not be saved :param verbose: boolean. Whether or not print to stdout info :param ncpu: int. Number of cpu's to use in parallel ''' testPrefixesNotEvaluated = [] originalTestPrefixToNewPrefix, __ = getOriginalToActualPrefixs(testPrefixes) alreadyComputedPrefixes_and_outnames= [] for testPrefix in originalTestPrefixToNewPrefix: if outputPath is not None: outName = getResultsOutname(outputPath, testPrefix, trainSubsetN) if verbose and os.path.isfile(outName): print("Complex already computed: %s" % (outName)) alreadyComputedPrefixes_and_outnames.append( (testPrefix, outName) ) else: testPrefixesNotEvaluated.append((testPrefix, outName)) else: testPrefixesNotEvaluated.append((testPrefix, None)) modelo = None from Config import Configuration conf = Configuration() modelFname= os.path.join(conf.tmp, hashlib.md5("".join(sorted(testPrefixes))).hexdigest()+str(trainSubsetN)+"bipspi2.pckl") resultsForEvaluation_list=[] if len(testPrefixesNotEvaluated) > 0 or len(testPrefixes) == 0: if verbose: print("Testing:", [ x[0] for x in testPrefixesNotEvaluated]) verboseLevel = 1 else: verboseLevel = 0 if os.path.exists(modelFname): print("Loading classifier") modelo= joblib_load(modelFname) else: print("Training classifier") modelo = trainMethod(trainData[:, 1:], trainData[:, 0], verboseLevel=verboseLevel, ncpu=ncpu) joblib_save(modelo, modelFname) del trainData gc.collect() if verbose: print("Classifier fitted.") expectedSize= estimateRequiredMemoryPerComplex(testPrefixesNotEvaluated, testPath) freeMem= checkFreeMemory() nJobs= int(max(1, min(ncpu, freeMem/expectedSize, len(testPrefixesNotEvaluated)))) print("Free memory for predictOnePrefix: %s GB. Njobs: %s (%s expected size)"%(freeMem, nJobs, expectedSize)) resultsForEvaluation_list= Parallel(n_jobs=nJobs)(delayed(predictOnePrefix)(originalTestPrefixToNewPrefix[testPrefix], modelo, outName, testPath) for testPrefix, outName in testPrefixesNotEvaluated ) gc.collect() expectedSize= estimateRequiredMemoryPerComplex(alreadyComputedPrefixes_and_outnames, testPath) freeMem= checkFreeMemory() nJobs= int(max(1, min(ncpu, freeMem/expectedSize, len(alreadyComputedPrefixes_and_outnames)))) resultsForEvaluation_list+= Parallel(n_jobs=nJobs)(delayed(loadExistingResults)( testPrefix, outName,) for testPrefix, outName in alreadyComputedPrefixes_and_outnames ) if len(resultsForEvaluation_list)>0: freeMem = checkFreeMemory() totMem= getTotalMemory() usedMem= totMem-freeMem nJobs = int(max(1, min(ncpu, freeMem / (usedMem/(1+len(resultsForEvaluation_list)))))) print("Free memory for evaluateOneResultObj: %s GB. Njobs: %s" % (freeMem, nJobs)) Parallel(n_jobs=nJobs)(delayed(evaluateOneResultObj)(testPrefix, resultObj, False) for testPrefix, resultObj in resultsForEvaluation_list) finalResults= zip(*resultsForEvaluation_list)[1] else: finalResults=[] del resultsForEvaluation_list tryToRemove(modelFname) return finalResults, modelo
def __init__(self): self.name = "DataFeed" self.Company = Configuration().GetData()['CompanyList'] self.CompanyP = Configuration().GetData()['CompanyListP'] self.APIKEYS = Configuration().GetData()['APIKEYDICT']
from __future__ import print_function import os import numpy as np import pandas as pd from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, matthews_corrcoef from collections import Counter from Config import Configuration pd.set_option('precision', 4) EVAL_PAIRS_AT = [50, 100, 500] PSAIA_PATH = os.path.join(Configuration().computedFeatsRootDir, "structStep/PSAIA/procPSAIA") def computeAUC(testLabels, predictions): ''' returns ROC's auc or 0.5 if it was not possible to compute it @param testLabels: int[]. List of labels (-1 for negative class and 1 for positive class) @param testLabels: float[]. List of scores predicted @return auc_score: float ''' try: return roc_auc_score(testLabels, predictions) except ValueError: return 0.5 def evaluatePairs(prefix, resDf): ''' Computes performance evaluation at pairs level.
import sys, os from multiprocessing import cpu_count from Config import Configuration from .FeaturesComputer import FeaturesComputer from .common.computeContactMap import ContactMapper from .common.seqInputPreproceser import SeqInputPreproceser from .seqStep.getSeqFeatures import SeqFeaturesCalculator from .structStep.PSAIA.computePSAIA import PSAIAComputer from .structStep.VORONOI.computeVoronoi import VORONOIComputer from .structStep.DSSP.computeDssp import DsspComputer from .structStep.HALF_SPHERE_EXPOS.computeHalfSphere import HalfSphereComputer #Default parameters conf = Configuration() pdbsIndirDefDefault = conf.pdbsIndir computedFeatsRootDirDefault = conf.computedFeatsRootDir useCorrMut = conf.useCorrMut featuresComputers = { "mixed": [("ContactMapper", (ContactMapper, { "boundAvailable": True })), ("PSAIAComputer", (PSAIAComputer, {})), ("VORONOIComputer30", (VORONOIComputer, { "maxDist": 30 })), ("DsspComputer", (DsspComputer, {})), ("HalfSphereComputer", (HalfSphereComputer, {})), ("SeqFeaturesCalculator", (SeqFeaturesCalculator, { "useCorrMut": useCorrMut }))],