Esempio n. 1
0
def send_email():
    sender = Configuration().GetData()['EmailID']
    gmail_password = Configuration().GetData()['Password']
    COMMASPACE = ', '
    recipients = ['*****@*****.**']

    # Create the enclosing (outer) message
    outer = MIMEMultipart()
    outer['Subject'] = 'DataFeed @ ' + str(datetime.datetime.now().date())
    outer['To'] = COMMASPACE.join(recipients)
    outer['From'] = sender
    outer.preamble = 'You will not see this in a MIME-aware mail reader.\n'

    msg = MIMEText('Data Feeding Start in Mongodb' + str(datetime.datetime.now()))
    outer.attach(msg)
    composed = outer.as_string()

    # Send the email
    try:
        with smtplib.SMTP('smtp.gmail.com', 587) as s:
            s.ehlo()
            s.starttls()
            s.ehlo()
            s.login(sender, gmail_password)
            s.sendmail(sender, recipients, composed)
            s.close()
            print("Email sent!")
    except:
        print("Unable to send the email. Error: ", sys.exc_info()[0])
        raise
Esempio n. 2
0
 def login(self, request):
     try:
         data = json.loads(request.data.decode())
         self.ip = Configuration().GetData()['PrivateIp']
         self.port = Configuration().GetData()['MongoPort']
         self.db = Configuration().GetData()['MongoDB']
         obj = MongoDB()
         obj.ConnectMongo(self.ip, self.port, self.db)
         record = obj.ReadValue("users", data["email"])
         if (record != None):
             record = ast.literal_eval(record['Data'])
             if (record['password'] == data["password"]):
                 ret = {
                     'access_token':
                     create_access_token(identity=data["email"]),
                     'refresh_token':
                     create_refresh_token(identity=data["email"]),
                     'status':
                     "True"
                 }
                 return jsonify(ret), 200
             else:
                 return jsonify({"status":
                                 "Invalid username or password"}), 401
         else:
             return jsonify({"status": "Invalid username or password"}), 401
     except Exception as e:
         generate_log('auth', str(e), str(request))
Esempio n. 3
0
 def get_user(self):
     try:
         email = get_jwt_identity()
         self.ip = Configuration().GetData()['PrivateIp']
         self.port = Configuration().GetData()['MongoPort']
         self.db = Configuration().GetData()['MongoDB']
         obj = MongoDB()
         obj.ConnectMongo(self.ip, self.port, self.db)
         record = obj.ReadValue("users", email)
         record = ast.literal_eval(record['Data'])
         record.pop('password', None)
         return jsonify(record)
     except Exception as e:
         generate_log('get_user', str(e), 'get_user method')
Esempio n. 4
0
def email_logfile():
    sender = Configuration().GetData()['EmailID']
    gmail_password = Configuration().GetData()['Password']
    dr_tariq_team = Configuration().GetData()['team_emails']
    COMMASPACE = ', '
    recipients = dr_tariq_team

    # Create the enclosing (outer) message
    outer = MIMEMultipart()
    outer['Subject'] = 'dpdmlog @ ' + str(datetime.datetime.now().date())
    outer['To'] = COMMASPACE.join(recipients)
    outer['From'] = sender
    outer.preamble = 'You will not see this in a MIME-aware mail reader.\n'

    # List of attachments
    attachments = ['sp_error.log']

    # Add the attachments to the message
    for file in attachments:
        try:
            with open(file, 'rb') as fp:
                msg = MIMEBase('application', "octet-stream")
                msg.set_payload(fp.read())
            encoders.encode_base64(msg)
            msg.add_header('Content-Disposition',
                           'attachment',
                           filename=os.path.basename(file))
            outer.attach(msg)
        except:
            print("Unable to open one of the attachments. Error: ",
                  sys.exc_info()[0])
            raise

    composed = outer.as_string()

    # Send the email
    try:
        with smtplib.SMTP('smtp.gmail.com', 587) as s:
            s.ehlo()
            s.starttls()
            s.ehlo()
            s.login(sender, gmail_password)
            s.sendmail(sender, recipients, composed)
            s.close()
        # print("Email sent!")
    except:
        print("Unable to send the email. Error: ", sys.exc_info()[0])
        raise
Esempio n. 5
0
    def setUp(self, is_unit_test):
        self.ExceptionHandler = CustomException.CustomException.getInstance()
        self.Configuration = Configuration()
        self.Validation = Validation
        self.Setup = Setup.Setup()
        self.Setup.setupApp()
        self.logger = Debugger.Debugger.getInstance()
        self.Database = Database.Database.getInstance(is_unit_test)
        self.Database.connect(is_unit_test)
        self.ProjectGUI = ProjectGUI
        self.Setup.createTables()
        self.ProjectRepo = ProjectRepository.ProjectRepository()

        email_configuration = self.Database.getConfiguration()

        try:
            self.Configuration.setEmailConfiguration(email_configuration[0])
        except:
            self.Configuration.setEmailConfiguration(email_configuration)
            pass

        self.ProjectsList = {}
        self.queue = {}

        self.loadAllProjects()
Esempio n. 6
0
 def getdata(self):
     app = TestApp("0.0.0.0", 4001, 10)
     for com in Configuration().GetData()['CompanyList']:
         ibcontract = IBcontract()
         ibcontract.secType = "STK"
         ibcontract.lastTradeDateOrContractMonth = "201809"
         ibcontract.symbol = com
         ibcontract.exchange = "SMART"
         resolved_ibcontract = app.resolve_ib_contract(ibcontract)
         dataset1 = {
             0: ['20190502  13:30:00', '20190502  16:00:00'],
             1: [209.95, 208.65],
             2: [212.65, 210.29],
             3: [208.13, 208.41],
             4: [208.63, 209.17],
             5: [149612, 100915]
         }
         durationstr = "3600 S"
         # historic_data = app.get_IB_historical_data(resolved_ibcontract, durationstr, bar)
         df = pd.DataFrame(dataset1)
         df.rename(columns={
             0: "date",
             1: "open",
             2: "high",
             3: "low",
             4: "close",
             5: "volume"
         },
                   inplace=True)
         for bar in barSize:
             dataset = self.strategy(df)
             print(com)
             print(bar)
             # MongoStore().Feed_IntraDay(com, bar, dataset)
             print(df)
Esempio n. 7
0
def data_feed():
    app = TestApp("0.0.0.0", 4001, 9)
    for com in Configuration().GetData()['CompanyList']:
        ibcontract = IBcontract()
        ibcontract.secType = "STK"
        ibcontract.lastTradeDateOrContractMonth = "201809"
        ibcontract.symbol = com
        ibcontract.exchange = "SMART"
        resolved_ibcontract = app.resolve_ib_contract(ibcontract)
        durationstr = "1 D"
        for bar in barSize:
            historic_data = app.get_IB_historical_data(resolved_ibcontract,
                                                       durationstr, bar)
            signal = 1
            dataset = pd.DataFrame(historic_data)
            dataset['signal'] = signal
            for index, row in dataset.iterrows():
                if dataset['open'][index] > dataset['close'][index]:
                    signal = 1
                else:
                    signal = 0
                print(signal)
                dataset['signal'][index] = signal
                print(com)
            print(bar)
            Feed_IntraDay(com, bar, dataset)
            print(dataset)
Esempio n. 8
0
def loadAccesibility(pdbId, chainType="l", rasaThr=10.0):
  '''
    Loads psaia files for por a given pdbId and returns a set of accesible
    resIds and non-accesible resIds.

    :param pdbId: str. The identifier for pdb file
    :param chainType: str. "l" for ligan and "r" for receptor
    :param rasaThr: float. A threshold of relative asa to decide whether or not a residue is accesible or not
        
    :return (accesibleSet, nonAccesibleSet)
        accesibleSet: set(str[]). Set of resIds of residues that are accesible according to PSAIA and the threshold
        nonAccesibleSet: set(str[]). Set of resIds of residues that are non-accesible according to PSAIA and the threshold
  '''
  PSAIA_PATH = os.path.join(Configuration().computedFeatsRootDir, "structStep/PSAIA/procPSAIA")
  accesibleSet=set([])
  nonAccesibleSet=set([])
  for fname in os.listdir(PSAIA_PATH):
    if fname.startswith(pdbId+"_"+chainType) and fname.endswith(".psaia.tab"):
      with open(os.path.join(PSAIA_PATH, fname)) as f_:
        f_.readline()
        for line in f_:
          lineArray= line.split()
          chainId= lineArray[0]
          resId= lineArray[1]
          res_full_id= chainId+"_"+resId
          if float(lineArray[8])> rasaThr:
            accesibleSet.add(res_full_id)
          else:
            nonAccesibleSet.add(res_full_id)
  return accesibleSet, nonAccesibleSet
Esempio n. 9
0
 def __init__(self):
     self.name = "user"
     self.ip = Configuration().GetData()['PrivateIp']
     self.port = Configuration().GetData()['MongoPort']
     self.db = Configuration().GetData()['MongoDB']
     self.email = Configuration().GetData()['EmailID']
     self.password = Configuration().GetData()['Password']
     self.reg_emailLink = Configuration().GetData()['RegisterEmail']
     self.forgot_passwordLink = Configuration().GetData()['ForgotPassword']
     self.email_ids = Configuration().GetData()['Email']
     self.sp_link = Configuration().GetData()['SP']
     self.mongoObj = MongoDB()
     self.mongoObj.ConnectMongo(self.ip, self.port, self.db)
Esempio n. 10
0
    def computeFeaturesAllComplexes(OneFeaturesComputerClass,
                                    pdbsIndir,
                                    computedFeatsRootDir,
                                    classArgs={},
                                    ncpu=1):
        '''
      Computes one type of feature over all complexes that are inside pdbsIndir.
      
      @param OneFeaturesComputerClass: FeaturesComputer. class to use for compute one kind of features
      @param pdbsIndir: str. path where pdb files to be computed are located. There must be 2 pdb files per complex
                             To distinguish them _l_ or _r_ infixed are used. P.e: "1A2K_l_u.pdb"  and "1A2K_r_u.pdb".
                             pdb files ended with "b.pdb" will be skipped.
                             
      @param computedFeatsRootDir: str. path where features will be stored
      @param classArgs: Dict. The arguments that will be passed to OneFeaturesComputerClass()
      @param ncpu: the number of subprocess to use in parallel (parallelism at complex level)
    '''

        pdbsIndir = os.path.expanduser(pdbsIndir)
        computedFeatsRootDir = os.path.expanduser(computedFeatsRootDir)
        ConfigObject = Configuration()
        if pdbsIndir == None:
            pdbsIndir = ConfigObject.pdbsIndir

        if computedFeatsRootDir == None:
            computedFeatsRootDir = ConfigObject.computedFeatsRootDir

        fnames = {}
        for fname in sorted(os.listdir(pdbsIndir)):
            if not fname.endswith(".pdb"): continue  #skip no pdb files
            if not fname.endswith("b.pdb"):
                prefix = fname.split("_")[0]
                if "_r_" in fname or "_l_" in fname:
                    if prefix not in fnames:
                        fnames[prefix] = [None, None]
                    if "_r_" in fname:
                        fnames[prefix][0] = os.path.join(pdbsIndir, fname)
                    if "_l_" in fname:
                        fnames[prefix][1] = os.path.join(pdbsIndir, fname)
                else:
                    fnames[prefix].append(os.path.join(pdbsIndir, fname))

        if len(fnames) < 1:
            raise ValueError("There are not files to be processed")
        for prefix in fnames:  # check for errors
            if len(fnames[prefix]) > 2 or sum(
                [1 for elem in fnames[prefix] if elem is None]):
                raise ValueError(
                    "There must be just 2 pdb files for each complex to be predicted"
                )
            else:
                fnames[prefix] = tuple(fnames[prefix])

        Parallel(n_jobs=ncpu, backend="multiprocessing",
                 batch_size=2)(delayed(computeFunction)(
                     OneFeaturesComputerClass, fnames[prefix][0],
                     fnames[prefix][1], computedFeatsRootDir, classArgs)
                               for prefix in sorted(fnames))
Esempio n. 11
0
    def get_history(self):
        try:
            email = get_jwt_identity()
            self.ip = Configuration().GetData()['PrivateIp']
            self.port = Configuration().GetData()['MongoPort']
            self.db = Configuration().GetData()['MongoDB']
            obj = MongoDB()
            obj.ConnectMongo(self.ip, self.port, self.db)
            record = obj.ReadValue("history", email)
            if record != None:
                record = ast.literal_eval(record["Data"])
                toreturn = {"status": "True", "record": record}

            else:
                # record = ast.literal_eval(record["Data"])
                toreturn = {"status": "False"}
            return jsonify(toreturn)
        except Exception as e:
            generate_log('get_history', str(e))
Esempio n. 12
0
    def checkIfAllCodified(self, inputPdbs=None):

        if inputPdbs is None:
            # Default parameters
            conf = Configuration()
            inputPdbs = conf.pdbsIndir

        check0 = self.checkIfAllCmapsComputed(inputPdbs)
        check1 = self.checkIfAllCodifiedWorker(self.trainingDataPath)
        check2 = self.checkIfAllCodifiedWorker(self.testingDataPath)

        return check0 and check1 and check2
Esempio n. 13
0
 def set_default_params(self):
     cfg = Configuration()
     self.learning_rate_value.setText(str(cfg.learning_rate))
     self.learning_steps_value.setText("200 400")
     self.decay_value.setText(str(cfg.decay))
     self.momentum_value.setText(str(cfg.momentum))
     self.epoch_value.setText(str(cfg.number_of_epochs))
     self.iterations_value.setText(str(cfg.number_of_iterations))
     self.path_content.setText('Dataset')
     self.dir_path = 'Dataset'
     self.class_value.setText(str(cfg.number_of_classes))
     self.seed_value.setText(str(cfg.seed))
     self.warm_up_value.setText(str(cfg.iterations_to_warmup))
Esempio n. 14
0
def data_feed():
    app = TestApp("0.0.0.0", 4001, 10)
    for com in Configuration().GetData()['CompanyList']:
        ibcontract = IBcontract()
        ibcontract.secType = "STK"
        ibcontract.lastTradeDateOrContractMonth="201809"
        ibcontract.symbol = com
        ibcontract.exchange = "SMART"
        resolved_ibcontract = app.resolve_ib_contract(ibcontract)
        bar='1 sec'
        durationstr = "120 sec"
        historic_data = app.get_IB_historical_data(resolved_ibcontract,durationstr,bar)
        print(com)
        print(bar)
        Feed_IntraDay(com,bar,historic_data)
        print(historic_data)0
Esempio n. 15
0
 def __init__(self, dataRootPath=None, featuresToInclude=None):
     if dataRootPath is None:
         dataRootPath = Configuration().computedFeatsRootDir
     if featuresToInclude is None:
         featuresToInclude = FEATURES_TO_INCLUDE
     else:
         if "psaia" not in zip(*featuresToInclude)[0]:
             featuresToInclude.insert(0,
                                      ("psaia",
                                       ("structStep/PSAIA/procPSAIA", [8], {
                                           "total_RASA": 8
                                       })))
     self.psaiaIndex = zip(*featuresToInclude)[0].index("psaia")
     DataLoader.__init__(self, dataRootPath, featuresToInclude)
     self.rasaLDict = {}
     self.rasaRDict = {}
Esempio n. 16
0
def moveAndWriteAsPDBIfMmcif(fnameIn, fnameOut, removeInput=False):
    from Config import Configuration
    conf = Configuration()
    minNumResidues, maxNumResidues = conf.minNumResiduesPartner, conf.maxNumResiduesPartner
    try:
        struct, __ = loadPdbIfIsPath(fnameIn)
        totalNumRes = 0
        for chain in struct[0]:
            nResInChain = len(chain.get_list())
            totalNumRes += nResInChain
        if not (minNumResidues < totalNumRes < maxNumResidues):
            raise BadNumberOfResidues(totalNumRes)
        else:
            writter = PDBIO()
            writter.set_structure(struct)
            writter.save(fnameOut)
            if removeInput: os.remove(fnameIn)
            return True
    except Exception as e:
        print("Error in moveAndWriteAsPDBIfMmcif !!!", e)
        return False
Esempio n. 17
0
def excutePatchDock(lPdbFname,
                    rPdbFname,
                    lBindingSite,
                    rBindingSite,
                    patchDockWdir,
                    writeOnlyLigand=False,
                    cleanWorkingDir=False):
    conf = Configuration()
    patchDockRootDir = conf.patchDockRootDir
    lPdbFname = uncompressPdbIfGz(lPdbFname)
    rPdbFname = uncompressPdbIfGz(rPdbFname)

    configStr = patchDockTemplate % {
        "lPdbFname": lPdbFname,
        "rPdbFname": rPdbFname,
        "patchDockRootDir": patchDockRootDir,
        "patchDockWdir": patchDockWdir
    }
    configFname = "%(patchDockWdir)s/config.txt" % {
        "patchDockWdir": patchDockWdir
    }
    resultsFname = "%(patchDockWdir)s/results.patchdock" % {
        "patchDockWdir": patchDockWdir
    }
    myMakeDir(patchDockWdir)
    with open(configFname, "w") as f:
        f.write(configStr)

    writeBindingSite(lBindingSite, patchDockWdir, chainType="l")
    writeBindingSite(rBindingSite, patchDockWdir, chainType="r")
    cmd = [
        os.path.join(patchDockRootDir, "patch_dock.Linux"), configFname,
        resultsFname
    ]
    proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=patchDockWdir)
    print(" ".join(cmd))
    output = proc.communicate()
    print(output[0])
    print(
        "\n?????????????????????????????????????????????????????????????????????\n"
    )
    print(output[1])
    if "error" in output[1]:
        raise Exception("Error executing patchDock")

    listOfSelectedModels = []
    with open(resultsFname) as f:
        for line in f:
            if "# | score | pen." in line:
                break
        for i, line in enumerate(f):
            if i >= N_MODELS_TO_EXTRACT:
                break
            lineArray = line.split("|")
            if len(lineArray) > 0:
                score = float(lineArray[1].strip())
                transformations = lineArray[-1].split()
                rots = [float(elem) for elem in transformations[:3]]
                trans = [float(elem) for elem in transformations[3:]]
            listOfSelectedModels.append((i, score, rots, trans))

    for i, score, rots, trans in listOfSelectedModels:
        rotX, rotY, rotZ = rots
        transX, transY, transZ = trans
        fnameOut = os.path.join(patchDockWdir,
                                "results.patchdock.%d.pdb" % (i + 1))
        rotateTranslatePdb(lPdbFname,
                           rotX,
                           rotY,
                           rotZ,
                           transX,
                           transY,
                           transZ,
                           fnameOut=fnameOut)
        if not writeOnlyLigand:
            cmd = "cat %(rPdbFname)s %(fnameOut)s > %(fnameOut)s.tmp && mv %(fnameOut)s.tmp %(fnameOut)s " % locals(
            )
            proc = Popen(cmd,
                         stdin=PIPE,
                         stdout=PIPE,
                         stderr=PIPE,
                         shell=True,
                         cwd=patchDockWdir)
            output = proc.communicate()
            print(output[0])
            print(output[1])
            if "error" in output[1]:
                raise Exception(
                    "Error concatenating ligand and receptor pdb files")

    if cleanWorkingDir:
        cleanDirectory(lPdbFname, rPdbFname, patchDockWdir)
    print("patchDock DONE")
    return listOfSelectedModels
import os, sys
import pandas as pd
from Config import Configuration
from subprocess import Popen, PIPE, check_call
from ast import literal_eval as make_tuple

from Bio import pairwise2
from Bio.SubsMat import MatrixInfo as matlist

cd_hit_path = "/home/rsanchez/Tesis/rriPredMethod/dependencies/bioinformaticTools/cdhit-master/cd-hit"

computedFeatsRootDir = Configuration().computedFeatsRootDir
sequencesPath_root = os.path.join(computedFeatsRootDir, "seqStep",
                                  "extractedSeqs")
sequencesPath = os.path.join(sequencesPath_root, "seqsData")
resIdsMap = os.path.join(sequencesPath_root, "seqToStructMap")
cMapPath = os.path.join(computedFeatsRootDir, "common", "contactMaps")
newCmPath = os.path.join(computedFeatsRootDir, "common", "contactMapsBinding")

aligsFastaName = "/tmp/aligsFastaName.fa"
cdhit_out = "/tmp/cdhit.out"

scoreMat = matlist.blosum62


def computeCD_hit():
    cmd = "awk 1 %s/*.fasta  > %s" % (sequencesPath, aligsFastaName)
    print(cmd)
    check_call(cmd, shell=True)
    proc = Popen([cd_hit_path, "-i", aligsFastaName, "-o", cdhit_out])
    outCdhit = proc.communicate()
Esempio n. 19
0
 def __init__(self):
     self.name = "DisplayStockApi"
     self.display_data = display_data()
     self.Company = Configuration().GetData()['CompanyList']
Esempio n. 20
0
 def __init__(self):
     self.name = "predictionApi"
     self.lstm = lstm()
     self.Company = Configuration().GetData()['CompanyList']
Esempio n. 21
0
def main():

    cfg = Configuration()
    parameters = {
        "seed": cfg.seed,
        "number_of_epochs": cfg.number_of_epochs,
        "number_of_classes": cfg.number_of_classes,
        "number_of_iterations": cfg.number_of_iterations,
        "momentum": cfg.momentum,
        "decay": cfg.decay,
        "learning_rate": cfg.learning_rate,
        "learning_steps": cfg.learning_rate_steps,
        "device": cfg.device,
        "dataset_dir": cfg.dataset_dir,
        "publishing_losses_frequency": cfg.publishing_losses_frequency,
        "checkpoint_path": cfg.ckpt_path,
        "learning_rate_lambda": cfg.learning_rate_lambda,
        "model_path": cfg.model_path,
        "iterations_to_warmup": cfg.iterations_to_warmup,
        "result_path": cfg.result_path
    }

    device = torch.device(parameters['device'])
    best_model_by_maskF = 0

    train_set = algorithm.COCODataset(parameters['dataset_dir'], "Train", train=True)
    indices = torch.randperm(len(train_set)).tolist()
    train_set = torch.utils.data.Subset(train_set, indices)
    val_set = algorithm.COCODataset(parameters['dataset_dir'], "Validation", train=True)
    model = algorithm.resnet50_for_mask_rcnn(True, parameters['number_of_classes']).to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=parameters['learning_rate'],
        momentum=parameters['momentum'],
        weight_decay=parameters['decay'])

    decrease = lambda x: parameters['learning_rate_lambda'] ** bisect.bisect(
        parameters['learning_steps'], x)

    starting_epoch = 0
    prefix, ext = os.path.splitext(parameters['checkpoint_path'])
    checkpoints = glob.glob(prefix + "-*" + ext)
    checkpoints.sort(key=lambda x: int(re.search(r"-(\d+){}".format(ext), os.path.split(x)[1]).group(1)))
    if checkpoints:
        checkpoint = torch.load(checkpoints[-1], map_location=device)
        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        starting_epoch = checkpoint["epochs"]
        del checkpoint
        torch.cuda.empty_cache()

    since = time.time()
    print("\nalready trained: {} epochs; to {} epochs".format(starting_epoch, parameters['number_of_epochs']))

    for epoch in range(starting_epoch, parameters['number_of_epochs']):
        print("\nepoch: {}".format(epoch + 1))

        training_epoch_time = time.time()
        parameters['learning_epoch'] = decrease(epoch) * parameters['learning_rate']

        algorithm.train_epoch(model, optimizer, train_set, device, epoch, parameters)
        training_epoch_time = time.time() - training_epoch_time
        print('training_epoch_time: ', training_epoch_time)

        validation_epoch_time = time.time()
        eval_output = algorithm.evaluate(model, val_set, device, parameters)
        validation_epoch_time = time.time() - validation_epoch_time
        print('validation_epoch_time: ', validation_epoch_time)

        trained_epoch = epoch + 1
        maskAP = eval_output.get_AP()
        maskAR = eval_output.get_AR()
        maskF = eval_output.get_AF()
        print('AP: ', maskAP)
        print('AR: ', maskAR)
        print('F1: ', maskF)
        if maskF['mask FScore'] > best_model_by_maskF:
            best_model_by_maskF = maskF['mask FScore']
            algorithm.save_best(model, optimizer, trained_epoch,
                                parameters['model_path'], eval_info=str(eval_output))

        algorithm.save_checkpoint(model, optimizer, trained_epoch,
                                  parameters['checkpoint_path'], eval_info=str(eval_output))

        prefix, ext = os.path.splitext(parameters['checkpoint_path'])
        checkpoints = glob.glob(prefix + "-*" + ext)
        checkpoints.sort(key=lambda x: int(re.search(r"-(\d+){}".format(ext), os.path.split(x)[1]).group(1)))
        n = 3
        if len(checkpoints) > n:
            for i in range(len(checkpoints) - n):
                os.remove("{}".format(checkpoints[i]))

    total_training_time = time.time() - since
    print('Total time: ', total_training_time)
Esempio n. 22
0
 def __init__(self):
     self.name = "DataFeed"
     self.Company = Configuration().GetData()['CompanyList']
Esempio n. 23
0
def computeFeaturesAllPdbsOneDir(pdbsIndir=None,
                                 computedFeatsRootDir=None,
                                 methodProtocol="struct",
                                 isHomeSet=False,
                                 ncpu=2):
    '''
    Computes all features needed for complex codification for all complexes in pdbsIndir. Used for training
    :param pdbsIndir: str. Path to the directory where pdb files are located. Must be named as follows:
                    path/to/pdbsIndir/
                                      prefix1_[chainType]_u.pdb or pdb.gz
                                      prefix2_[chainType]_u.pdb or pdb
                            By default, it uses as pdbsIndir Config.py DEFAULT_PARAMETERS["pdbsIndir"] 
    :param computedFeatsRootDir: str. Path where features files will be saved. By default it uses
                                Config.py DEFAULT_PARAMETERS["computedFeatsRootDir"] will be used as out_path
    :param methodProtocol: str. "seq" if just sequential features will be used; "struct" if sequential and
                                structural features will be used. "mixed" behaves as "struct"
    :param isHomeSet: True if h**o-dataset is to be used, False for hetero dataset or unknown
    :param ncpu: int. Number of cpu's to use. If -1, all cpu's will be used to parallelize at complex level
  '''
    assert methodProtocol in ["seq", "struct", "mixed"], "Error methodProtocol in computeFeaturesAllPdbsOneDir must " + \
                                                     "be 'seq' or 'struct' or 'mixed'->"+str(methodProtocol)

    if pdbsIndir is None or computedFeatsRootDir is None:
        # Default parameters
        conf = Configuration()
        pdbsIndir = pdbsIndir if pdbsIndir else conf.pdbsIndir
        computedFeatsRootDir = computedFeatsRootDir if computedFeatsRootDir else conf.computedFeatsRootDir

    allCodifiedFname = os.path.join(computedFeatsRootDir,
                                    "allFeaturesComputed.txt")
    if os.path.exists(allCodifiedFname):
        with open(allCodifiedFname) as f:
            print(f.read())
        return

    if ncpu < 1:
        ncpu = cpu_count()

    if pdbsIndir == None:
        pdbsIndir = conf.pdbsIndir
    if computedFeatsRootDir == None:
        computedFeatsRootDir = conf.computedFeatsRootDir
    pdbsIndir = os.path.expanduser(pdbsIndir)
    computedFeatsRootDir = os.path.expanduser(computedFeatsRootDir)
    fnames = OrderedDict({})
    fnamesOther = OrderedDict({})
    for fname in sorted(os.listdir(pdbsIndir)):
        if fname.endswith("_u.pdb.gz") or fname.endswith(
                "_u.pdb"):  #skip no pdb files
            prefix, chainType = splitExtendedPrefix(
                getExtendedPrefix(fname, splitTag="_u."))
            if prefix not in fnames:
                fnames[prefix] = [None, None]
            if chainType == "r":
                fnames[prefix][1] = os.path.join(pdbsIndir, fname)
            elif chainType == "l":
                fnames[prefix][0] = os.path.join(pdbsIndir, fname)
            else:
                raise FeatureComputerException("Error in filename %s" % fname)
        elif fname.endswith("_b.pdb.gz") or fname.endswith("_b.pdb"):
            prefix, chainType = splitExtendedPrefix(
                getExtendedPrefix(fname, splitTag="_b."))
            if prefix not in fnamesOther:
                fnamesOther[prefix] = [None, None]
            if chainType == "r":
                fnamesOther[prefix][1] = os.path.join(pdbsIndir, fname)
            elif chainType == "l":
                fnamesOther[prefix][0] = os.path.join(pdbsIndir, fname)
            else:
                raise FeatureComputerException("Error in filename %s" % fname)

    if len(fnames) == len(fnamesOther):
        boundAvailable = True
    else:
        boundAvailable = False

    if len(fnames) < 1: raise ValueError("There are not files to be processed")
    for prefix in fnames:  # check for errors
        if sum([1 for elem in fnames[prefix] if elem is None]) > 0:
            print(fnames[prefix])
            raise ValueError(
                "There must be just 2 pdb files for each complex to be processed"
            )
        if boundAvailable:
            if sum([1 for elem in fnamesOther[prefix] if elem is None]) > 0:
                raise ValueError(
                    "There is no bound structure for some of your pdb files")

    print("Bound available:", boundAvailable)
    Parallel(n_jobs=ncpu, backend="multiprocessing",
             batch_size=2)(delayed(launchComputeFeaturesOneComplex)(
                 fnames[prefix],
                 prefix,
                 computedFeatsRootDir=computedFeatsRootDir,
                 boundAvailable=boundAvailable,
                 methodProtocol=methodProtocol,
                 checkIfLRHomo=isHomeSet) for prefix in sorted(fnames))

    with open(allCodifiedFname, "w") as f:
        f.write("All features computed for: %d" % len(fnames))
Esempio n. 24
0
from Config import Configuration
from selenium import webdriver


config_instance = Configuration()
export_dir = config_instance.EXPORT_DIR

options = webdriver.ChromeOptions()
prefs = {'download.default_directory' : str(export_dir)}
options.add_experimental_option('prefs', prefs)

# Create code driver and open Chrome
driver = webdriver.Chrome('C:/Users/blahova.m/Downloads/chromedriver_win32/chromedriver.exe', chrome_options=options)

# Open EMS web page
driver.get('http://www.emsbrno.cz/p.axd/en/Products.html')

# find login link to click at, and continue to login page
driver.find_element_by_link_text('Log in').click()

# select username text box element
username_box = driver.find_element_by_xpath('//*[@id="layout_ctl00_ctl00_content"]/table/tbody/tr[1]/td[2]/input')
username_box.send_keys(config_instance.USER_NAME)
# select password text box element
password_box = driver.find_element_by_xpath('//*[@id="layout_ctl00_ctl00_content"]/table/tbody/tr[2]/td[2]/input')
password_box.send_keys(config_instance.PASSWORD)
# select login button and click
driver.find_element_by_xpath('//*[@id="layout_ctl00_ctl00_content"]/table/tbody/tr[4]/td[2]/input').click()

for page_url in config_instance.LINKS_ARRAY:
    driver.get(page_url)
Esempio n. 25
0
def trainAndTestOneFold(trainData, testPrefixes, trainSubsetN, testPath, outputPath, verbose=False, ncpu=1):
  '''
    Trains and tests one fold
     
     :param trainData: a numpy array for training with first column labels and the others are features
     :param testPrefixes: str[]. A list that contains prefixes for all complexes to be tested
     :param trainSubsetN: int Tuple. The numerical ids of the training split.
     :param testPath: str. Path to a dir where testing data files are stored
     :param outputPath: str. Path to a dir where predictions will be stored. None if results will not be saved
     :param verbose: boolean. Whether or not print to stdout info
     :param ncpu: int. Number of cpu's to use in parallel
  '''

  testPrefixesNotEvaluated = []
  originalTestPrefixToNewPrefix, __ = getOriginalToActualPrefixs(testPrefixes)
  alreadyComputedPrefixes_and_outnames= []
  for testPrefix in originalTestPrefixToNewPrefix:
    if outputPath is not None:
      outName = getResultsOutname(outputPath, testPrefix, trainSubsetN)
      if verbose and os.path.isfile(outName):
        print("Complex already computed: %s" % (outName))
        alreadyComputedPrefixes_and_outnames.append(  (testPrefix, outName) )
      else:
        testPrefixesNotEvaluated.append((testPrefix, outName))
    else:
      testPrefixesNotEvaluated.append((testPrefix, None))

  modelo = None

  from Config import Configuration
  conf = Configuration()
  modelFname= os.path.join(conf.tmp, hashlib.md5("".join(sorted(testPrefixes))).hexdigest()+str(trainSubsetN)+"bipspi2.pckl")

  resultsForEvaluation_list=[]
  if len(testPrefixesNotEvaluated) > 0 or len(testPrefixes) == 0:
    if verbose:
      print("Testing:", [ x[0] for x in testPrefixesNotEvaluated])
      verboseLevel = 1
    else:
      verboseLevel = 0

    if os.path.exists(modelFname):
      print("Loading classifier")
      modelo= joblib_load(modelFname)
    else:
      print("Training classifier")
      modelo = trainMethod(trainData[:, 1:], trainData[:, 0], verboseLevel=verboseLevel, ncpu=ncpu)
      joblib_save(modelo, modelFname)
    del trainData
    gc.collect()
    if verbose: print("Classifier fitted.")
    
    expectedSize= estimateRequiredMemoryPerComplex(testPrefixesNotEvaluated, testPath)
    freeMem= checkFreeMemory()
    nJobs= int(max(1, min(ncpu, freeMem/expectedSize, len(testPrefixesNotEvaluated))))
    print("Free memory for predictOnePrefix: %s GB. Njobs: %s (%s expected size)"%(freeMem, nJobs, expectedSize))

    resultsForEvaluation_list= Parallel(n_jobs=nJobs)(delayed(predictOnePrefix)(originalTestPrefixToNewPrefix[testPrefix],
                                                                      modelo, outName, testPath)
                                      for testPrefix, outName in testPrefixesNotEvaluated )
    gc.collect()

  expectedSize= estimateRequiredMemoryPerComplex(alreadyComputedPrefixes_and_outnames, testPath)
  freeMem= checkFreeMemory()
  nJobs= int(max(1, min(ncpu, freeMem/expectedSize, len(alreadyComputedPrefixes_and_outnames))))     
  resultsForEvaluation_list+= Parallel(n_jobs=nJobs)(delayed(loadExistingResults)( testPrefix, outName,)
                                    for testPrefix, outName in alreadyComputedPrefixes_and_outnames )
    
  if len(resultsForEvaluation_list)>0:
    freeMem = checkFreeMemory()
    totMem= getTotalMemory()
    usedMem= totMem-freeMem
    nJobs = int(max(1, min(ncpu, freeMem / (usedMem/(1+len(resultsForEvaluation_list))))))
    print("Free memory for evaluateOneResultObj: %s GB. Njobs: %s" % (freeMem, nJobs))
    Parallel(n_jobs=nJobs)(delayed(evaluateOneResultObj)(testPrefix, resultObj, False)
                           for testPrefix, resultObj in resultsForEvaluation_list)
    finalResults= zip(*resultsForEvaluation_list)[1]
  else:
    finalResults=[]
  del resultsForEvaluation_list
  tryToRemove(modelFname)
  return finalResults, modelo
Esempio n. 26
0
 def __init__(self):
     self.name = "DataFeed"
     self.Company = Configuration().GetData()['CompanyList']
     self.CompanyP = Configuration().GetData()['CompanyListP']
     self.APIKEYS = Configuration().GetData()['APIKEYDICT']
Esempio n. 27
0
from __future__ import print_function
import os
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, matthews_corrcoef
from collections import Counter
from Config import Configuration
pd.set_option('precision', 4)

EVAL_PAIRS_AT = [50, 100, 500]

PSAIA_PATH = os.path.join(Configuration().computedFeatsRootDir,
                          "structStep/PSAIA/procPSAIA")


def computeAUC(testLabels, predictions):
    '''
    returns ROC's auc or 0.5 if it was not possible to compute it
    @param testLabels: int[]. List of labels (-1 for negative class and 1 for positive class)
    @param testLabels: float[]. List of scores predicted
    @return auc_score: float
  '''
    try:
        return roc_auc_score(testLabels, predictions)
    except ValueError:
        return 0.5


def evaluatePairs(prefix, resDf):
    '''
    Computes performance evaluation at pairs level.
Esempio n. 28
0
import sys, os
from multiprocessing import cpu_count
from Config import Configuration
from .FeaturesComputer import FeaturesComputer
from .common.computeContactMap import ContactMapper
from .common.seqInputPreproceser import SeqInputPreproceser

from .seqStep.getSeqFeatures import SeqFeaturesCalculator

from .structStep.PSAIA.computePSAIA import PSAIAComputer
from .structStep.VORONOI.computeVoronoi import VORONOIComputer
from .structStep.DSSP.computeDssp import DsspComputer
from .structStep.HALF_SPHERE_EXPOS.computeHalfSphere import HalfSphereComputer

#Default parameters
conf = Configuration()
pdbsIndirDefDefault = conf.pdbsIndir
computedFeatsRootDirDefault = conf.computedFeatsRootDir
useCorrMut = conf.useCorrMut

featuresComputers = {
    "mixed": [("ContactMapper", (ContactMapper, {
        "boundAvailable": True
    })), ("PSAIAComputer", (PSAIAComputer, {})),
              ("VORONOIComputer30", (VORONOIComputer, {
                  "maxDist": 30
              })), ("DsspComputer", (DsspComputer, {})),
              ("HalfSphereComputer", (HalfSphereComputer, {})),
              ("SeqFeaturesCalculator", (SeqFeaturesCalculator, {
                  "useCorrMut": useCorrMut
              }))],