def parameter_selection():
  cluster = get_free_nodes()[0]
  #cluster = ['dave']
  jobs=[]
  n=0
  is_main_run=0

  #filenames=['emotions','yeast','scene','enron','medical','toy10','toy50']#,'cal500','fp','cancer'] 
  filenames=['toy10','toy50','emotions','yeast','scene','enron','medical']
  n=0
  for filename in filenames:
    for graph_type in ['tree']:
      for l_norm in ['2']:
        in_i=0
        for in_c in ['100','75','50','20','10','5','1','0.5','0.25','0.1','0.01']:
          in_i+=1
          for kth_fold in ['1','2','3','4','5']:
            node=cluster[n%len(cluster)]
            n+=1
            p=multiprocessing.Process(target=singleRSTA, args=(filename,graph_type,'1',node,kth_fold,l_norm,"%d" % in_i,in_c,))
            jobs.append(p)
            p.start()
            time.sleep(2) # fold
            pass
        time.sleep(2) # c
        pass
      time.sleep(2*is_main_run) # lnorm
      pass
    time.sleep(60*is_main_run) # tree
    for job in jobs:
      job.join()
      pass
    pass
def run():
  logging.info('\t\tGenerating priority queue.')
  # get list of files
  filelist = os.listdir('../../FeatureExtraction/Results/bids/')
  # generate job queueo
  jobind = 0
  n=1
  nodefilelist = []
  for filename in filelist:
    if checkfile(filename):
        continue
    if n>5:
      job_queue.put((jobind,(nodefilelist)))
      jobind+=1
      n=1
      nodefilelist = []
    nodefilelist.append(filename)
    n+=1
  if len(nodefilelist) > 0:
    job_queue.put((jobind,(nodefilelist)))
  # get computing node
  cluster = get_free_nodes()[0]
  #cluster = ['melkinkari'] # if you don't have access to any computer cluster, just use your machine as the only computing node
  #cluster = ['ukko133.hpc'] # if you don't have access to any computer cluster, just use your machine as the only computing node
  # run jobs
  job_size = job_queue.qsize()
  logging.info( "\t\tProcessing %d job_queue" % (job_size))
  is_main_run_factor=1
  # running job_queue
  threads = []
def run():
        cluster = get_free_nodes()[0]
        jobs=[]
        n=0

        filenames=['memeS','memeM','memeL']
        filenames=[]
        for year in [2000]:
                for copaper in [10]:#,10,15]:
                        for v in [1000]:#,300,500,700,1000,2000]:
                                filenames.append('%d_%d_%d' % (year,copaper,v))
        for filename in filenames:
                c='100'
                g='0.8'
                for nb in ['1','3','5','7']:
                        for losstype in ['dif','exp']:
                                if losstype == 'exp':
                                        penaltyrange = ['0.3','0.5','0.7','1','3','5','7']
                                else:
                                        penaltyrange = ['0.1','0.5','0.8']
                                for penalty in penaltyrange:
                                        node = cluster[n%len(cluster)]
                                        n+=1
                                        p=multiprocessing.Process(target=singlerun, args=(filename,c,g,nb,penalty,losstype,node,))
                                        jobs.append(p)
                                        p.start()
                                        time.sleep(15)
                        time.sleep(60)
                time.sleep(300)
        for job in jobs:
                job.join()
Example #4
0
def run():
    cluster = get_free_nodes()[0]
    #cluster = ['dave']
    jobs = []
    n = 0
    is_main_run = 0.01

    filenames = [
        'emotions', 'yeast', 'scene', 'enron', 'cal500', 'fp', 'cancer',
        'medical', 'toy10', 'toy50', 'toy100'
    ]
    n = 0
    for filename in filenames:
        for graph_type in ['pair', 'tree']:
            for t in range(180):
                para_t = "%d" % (t + 1)
                node = cluster[n % len(cluster)]
                n += 1
                p = multiprocessing.Process(target=singleMAVAMMlearner,
                                            args=(
                                                filename,
                                                graph_type,
                                                para_t,
                                                node,
                                            ))
                jobs.append(p)
                p.start()
                time.sleep(1 * is_main_run)
            time.sleep(60 * is_main_run)

    for job in jobs:
        job.join()
    pass
def compute_graph_kernels_in_parallel():
  cluster = get_free_nodes()[0]
  kernelnamelist =  ['lRWkernel','WL',  'WLedge',  'WLspdelta',  'RGkernel1',  'l3graphletkernel',  'untilpRWkernel4',  'untilpRWkernel6',  'untilpRWkernel8',  'untilpRWkernel10',  'spkernel',  'SPkernel',  'RWkernel',  'gestkernel3',  'gestkernel4',  'connectedkernel3',  'connectedkernel4',  'connectedkernel5']
  # generate jobs and so on
  job_id = 0
  job_size = 1
  for kernelname in kernelnamelist:
    kernelresultfile = "../DTPNCI2015/results/ncicancer_kernel_graph_%s" % (kernelname)
    if os.path.exists(kernelresultfile):
      continue
    job_id = job_id + 1
    job_content = kernelname
    job_queue.put((job_id, job_content))
  logging.info("Processing jobs ...")
  # processing jobs
  job_size = job_queue.qsize()
  logging.info("In total %d jobs" % job_size)
  loadpernode = 1 
  threads = []
  counter = 0
  for node in cluster:
    for i in range(loadpernode):
      t = Worker(job_queue, node)
      time.sleep(0.5)
      counter = counter +1
      if counter > job_size:
        break
      try:
        t.start()
        threads.append(t)
      except ThreadError:
        logging.warning("\t\tThread error!")
  for t in threads:
    t.join()
  pass
def run():
  cluster = get_free_nodes()[0]
  #cluster = ['dave']
  jobs=[]
  n=0
  is_main_run=1

  filenames=['emotions','yeast','scene','enron','cal500','fp','cancer','medical','toy10','toy50','toy100'] 
  filenames=['fp']
  n=0
  for filename in filenames:
    for graph_type in ['pair']:
      for t in range(180):
        para_t="%d" % (t+1)
        node=cluster[n%len(cluster)]
        n+=1
        p=multiprocessing.Process(target=singleMAMlearner, args=(filename,graph_type,para_t,node,))
        jobs.append(p)
        p.start()
        time.sleep(1*is_main_run)
      time.sleep(30*is_main_run)
    time.sleep(600*is_main_run)

  for job in jobs:
    job.join()
  pass
def compute_fingerprints_in_parallel():
  # cluster is a list of node in an interative cluster e.g. ['ukko001.hpc','ukko002.hpc']
  cluster = get_free_nodes()[0]
  #cluster = ['melkinkari']
  logging.info("Read in list of molecules ...")
  moleculelist = []
  fin = open('../DTPNCI2015/results/ncicancer_labels')
  for line in fin:
    moleculelist.append(line.strip())
  if fp_flag==1:
    fppath = '../structures/FPfiles/'
  elif fp_flag==2:
    fppath = '../structures/FPfiles/'
  elif fp_flag==3:
    fppath = '../structures/FPfiles/'
  else:
    fppath = '../structures/MATLABfiles/'
  if not os.path.exists(fppath):
    os.makedirs(fppath)
  logging.info("Generate jobs ...")
  # generate jobs and so on
  job_id = 0
  job_size = 1
  for molecule in moleculelist:
    if fp_flag==1:
      sdffilename = "%s%s.fp" % (fppath,molecule)
    elif fp_flag==2:
      sdffilename = "%s%s.fp3" % (fppath,molecule)
    elif fp_flag==3:
      sdffilename = "%s%s.fp4" % (fppath,molecule)
    else:
      sdffilename = "%s%s.mat" % (fppath,molecule)
    if os.path.exists(sdffilename):
      continue
    job_id = job_id + 1
    job_content=molecule
    job_queue.put((job_id,job_content))
  logging.info("Processing jobs ...")
  # processing jobs
  job_size = job_queue.qsize()
  logging.info("In total %d jobs" % job_size)
  loadpernode = 8
  threads = []
  counter = 0
  for node in cluster:
    for i in range(loadpernode):
      t = Worker(job_queue, node)
      time.sleep(0.5)
      counter = counter +1
      if counter > job_size:
        break
      try:
        t.start()
        threads.append(t)
      except ThreadError:
        logging.warning("\t\tThread error!")
  for t in threads:
    t.join()
  pass
Example #8
0
def run():
    cluster = get_free_nodes()[0]
    #cluster = ['dave']
    jobs = []
    n = 0
    is_main_run_factor = 5

    filenames = [
        'cancer', 'ArD20', 'ArD30', 'toy10', 'toy50', 'emotions', 'yeast',
        'medical', 'scene', 'enron', 'cal500', 'fp'
    ]
    n = 0
    # generate jobs
    for kth_fold in ['1']:  #,'2','3','4','5']:
        for filename in filenames:
            graph_type = 'tree'
            for kappa in [180,
                          280]:  #['2','4','8','16','20','32','40','50','60']:
                for l_norm in ['2']:
                    #for t in [5]:#range(0,41,10):
                    for t in [1, 5] + range(10, 41, 10):
                        if t == 0:
                            t = 1
                        para_t = "%d" % (t)
                        try:
                            with open(
                                    "../outputs/%s_%s_%s_f%s_l%s_k%s_pfRSTAs.log"
                                    % (filename, graph_type, para_t, kth_fold,
                                       l_norm, kappa)):
                                pass
                            continue
                        except:
                            n = n + 1
                            job_queue.put((n, filename, graph_type, para_t,
                                           kth_fold, l_norm, kappa))
                        pass  # for |T|
                    pass  # for l
                pass  # for kappa
            pass  # for datasets
        pass  # for k fole
    # running jobs
    job_size = job_queue.qsize()
    logging.info("\t\tprocessing %d jobs" % (job_size))
    threads = []
    for i in range(len(cluster)):
        if job_queue.empty():
            break
        t = Worker(job_queue, cluster[i])
        time.sleep(is_main_run_factor)
        try:
            t.start()
            threads.append(t)
        except ThreadError:
            logging.warning("\t\tError: thread error caught!")
        pass
    for t in threads:
        t.join()
        pass
    pass  # def
Example #9
0
def compute_fingerprints_in_parallel():
    # cluster is a list of node in an interative cluster e.g. ['ukko001.hpc','ukko002.hpc']
    cluster = get_free_nodes()[0]
    #cluster = ['melkinkari']
    logging.info("Read in list of molecules ...")
    moleculelist = []
    fin = open('../DTPNCI2015/results/ncicancer_labels')
    for line in fin:
        moleculelist.append(line.strip())
    if fp_flag == 1:
        fppath = '../structures/FPfiles/'
    elif fp_flag == 2:
        fppath = '../structures/FPfiles/'
    elif fp_flag == 3:
        fppath = '../structures/FPfiles/'
    else:
        fppath = '../structures/MATLABfiles/'
    if not os.path.exists(fppath):
        os.makedirs(fppath)
    logging.info("Generate jobs ...")
    # generate jobs and so on
    job_id = 0
    job_size = 1
    for molecule in moleculelist:
        if fp_flag == 1:
            sdffilename = "%s%s.fp" % (fppath, molecule)
        elif fp_flag == 2:
            sdffilename = "%s%s.fp3" % (fppath, molecule)
        elif fp_flag == 3:
            sdffilename = "%s%s.fp4" % (fppath, molecule)
        else:
            sdffilename = "%s%s.mat" % (fppath, molecule)
        if os.path.exists(sdffilename):
            continue
        job_id = job_id + 1
        job_content = molecule
        job_queue.put((job_id, job_content))
    logging.info("Processing jobs ...")
    # processing jobs
    job_size = job_queue.qsize()
    logging.info("In total %d jobs" % job_size)
    loadpernode = 8
    threads = []
    counter = 0
    for node in cluster:
        for i in range(loadpernode):
            t = Worker(job_queue, node)
            time.sleep(0.5)
            counter = counter + 1
            if counter > job_size:
                break
            try:
                t.start()
                threads.append(t)
            except ThreadError:
                logging.warning("\t\tThread error!")
    for t in threads:
        t.join()
    pass
def run():
  '''
  a wrapper function to generate cluster node with good perforamnce, to define a job space, and to launch jobs on computing node of the cluster
  '''
  logging.info('\t\tGenerating priority queue.')
  paramInd = 0
  kFold    = 5 
  numLabel = 3200  
  suffix   = 'val'
  isTest   = '0'
  # iterate over the lists
  xFilenameList         = ['../Data/tcdb.TB',  '../Data/tcdb.TICoils',  '../Data/tcdb.TIGene3D',  '../Data/tcdb.TIHamap',  '../Data/tcdb.TIPANTHER',  '../Data/tcdb.TIPfam',  '../Data/tcdb.TIPhobius',  '../Data/tcdb.TIPIRSF',  '../Data/tcdb.TIPRINTS',  '../Data/tcdb.TIProDom',  '../Data/tcdb.TIProSitePatterns',  '../Data/tcdb.TIProSiteProfiles',  '../Data/tcdb.TISignalP_EUK',  '../Data/tcdb.TISignalP_GRAM_NEGATIVE',  '../Data/tcdb.TISignalP_GRAM_POSITIVE',  '../Data/tcdb.TISMART',  '../Data/tcdb.TISUPERFAMILY',  '../Data/tcdb.TITIGRFAM',  '../Data/tcdb.TITMHMM',  '../Data/tcdb.TPSI',  '../Data/tcdb.TRPSCDD',  '../Data/tcdb.TRPSCDDNCBI',  '../Data/tcdb.TRPSCOG',  '../Data/tcdb.TRPSKOG',  '../Data/tcdb.TRPSPFAM',  '../Data/tcdb.TRPSPRK',  '../Data/tcdb.TRPSSMART',  '../Data/tcdb.TRPSTCDB201509PSSM',  '../Data/tcdb.TRPSTIGR']
  #xFilenameList         = ['../Data/tcdb.TICoils']
  yFilenameList         = ['../Data/tcdb.TC']
  labelIndexList        = xrange(1,numLabel+1)
  foldIndexList         = xrange(1,kFold+1) 
  # generate job queue, will iterate over c,k,label
  for xFilename,yFilename,labelIndex,foldIndex in list(itertools.product(xFilenameList,yFilenameList,labelIndexList,foldIndexList)):
    for line in open('parameter_setting'):
      words = line.strip().split(' ')
      if words[0] == xFilename and words[1] == yFilename:
        svmC = words[2]
    tmpDir   = '../ResultsSVM/%s_%s/' % ( re.sub('.*/','',xFilename), re.sub('.*/','',yFilename))
    if not os.path.exists(tmpDir): os.mkdir(tmpDir)
    paramInd += 1
    outputFilename = tmpDir + '/' + re.sub('.*/','',xFilename) + '_' + re.sub('.*/','',yFilename) + '_l' + str(labelIndex) + '_f' + str(foldIndex) + '_c' +svmC + '_t' + isTest + '_' + suffix 
    ## check if result is ready already
    if checkfile(outputFilename): continue
    ## put parameter into queue
    job_queue.put( (paramInd,(str(paramInd),xFilename,yFilename,str(labelIndex),str(foldIndex),svmC,outputFilename,isTest)) )

  # get computing node
  logging.info('\t\tObtain cluster node')
  cluster = get_free_nodes()[0]
  #cluster = ['ukko133.hpc'] 

  # run jobs
  job_size = job_queue.qsize()
  logging.info( "\t\tProcessing %d job_queue" % (job_size))
  is_main_run_factor=1
  # running job_queue
  threads = []
  workerload = 3 
  for i in range(len(cluster)):
    for j in range(workerload):
      if job_queue.empty(): break
      t = Worker(job_queue, cluster[i])
      time.sleep(is_main_run_factor)
      try:
        t.start()
        threads.append(t)
      except ThreadError:
        logging.warning("\t\tError: thread error caught!")
    pass
  for t in threads:
    t.join()
    pass
  pass 
Example #11
0
def run():
  is_main_run_factor=5
  #filenames=['toy10','toy50','emotions','medical','enron','yeast','scene','cal500','fp','cancer']
  #filenames=['cancer']
  filenames=['toy10','toy50','emotions','yeast','scene','enron','fp','medical']
  n=0
  # generate job_queue
  logging.info('\t\tGenerating priority queue.')
  for newton_method in ['1','0']:
    for filename in filenames:
      for slack_c in ['1', '10', '0.1']:
        for t in [1, 5, 10, 20, 30]:
          para_t="%d" % (t)
          graph_type = 'tree'
          for kappa in ['1','2','3','4','5','6','8','10','12','14','16']:
            for l_norm in ['2']:
              #for kth_fold in ['1','2','3','4','5']:
              for kth_fold in ['1']:
                for loss_scaling_factor in ['0.1','1']:
                  if checkfile(filename,graph_type,para_t,kth_fold,l_norm,kappa,slack_c,loss_scaling_factor,newton_method):
                    continue
                  else:
                    n=n+1
                    job_queue.put( (n, (filename,graph_type,para_t,kth_fold,l_norm,kappa,slack_c,loss_scaling_factor,newton_method)) )
                  pass # for newton_method
                pass # for loss_scaling_factor
              pass # for slack_c
            pass # for |T|
          pass # for l
        pass # for kappa
      pass # for datasets
    pass # for k fole
  # get computing nodes
  cluster = get_free_nodes()[0] # if you have access to some interactive computer cluster, get the list of hostnames of the cluster
  #cluster = ['melkinkari'] # if you don't have access to any computer cluster, just use your machine as the only computing node
  # running job_queue
  job_size = job_queue.qsize()
  logging.info( "\t\tProcessing %d job_queue" % (job_size))
  threads = []
  for i in range(len(cluster)):
    if job_queue.empty():
      break
    t = Worker(job_queue, cluster[i])
    time.sleep(is_main_run_factor)
    try:
      t.start()
      threads.append(t)
    except ThreadError:
      logging.warning("\t\tError: thread error caught!")
    pass
  for t in threads:
    t.join()
    pass
  pass # def
def run():
  logging.info('\t\tGenerating priority queue.')
  paramInd = 0
  kFold    = 5 
  numLabel = 3200  
  suffix   = 'val'
  isTest   = '0'
  # iterate over the lists
  xFilenameList         = ['../Data/tcdb.all.KUNIMKL','../Data/tcdb.all.KALIGN','../Data/tcdb.all.KALIGNF', '../Data/tcdb.all.GUNIMKL','../Data/tcdb.all.GALIGN','../Data/tcdb.all.GALIGNF']
  yFilenameList         = ['../Data/tcdb.TC']
  labelIndexList        = xrange(1,numLabel+1)
  foldIndexList         = xrange(1,kFold+1) 
  # generate job queue, will iterate over c,k,label
  for xFilename,yFilename,labelIndex,foldIndex in list(itertools.product(xFilenameList,yFilenameList,labelIndexList,foldIndexList)):
    for line in open('parameter_setting'):
      words = line.strip().split(' ')
      if words[0] == xFilename and words[1] == yFilename:
        svmC = words[2]
    tmpDir   = '../ResultsMKL/%s_%s/' % ( re.sub('.*/','',xFilename), re.sub('.*/','',yFilename))
    if not os.path.exists(tmpDir): os.mkdir(tmpDir)
    paramInd += 1
    outputFilename = tmpDir + '/' + re.sub('.*/','',xFilename) + '_' + re.sub('.*/','',yFilename) + '_l' + str(labelIndex) + '_f' + str(foldIndex) + '_c' +svmC + '_t' + isTest + '_' + suffix 
    ## check if result is ready already
    if checkfile(outputFilename): continue
    ## put parameter into queue
    job_queue.put( (paramInd,(str(paramInd),xFilename,yFilename,str(labelIndex),str(foldIndex),svmC,outputFilename,isTest)) )

  # get computing node
  logging.info('\t\tObtain cluster node')
  cluster = get_free_nodes()[0]
  #cluster = ['ukko133.hpc'] 

  # run jobs
  job_size = job_queue.qsize()
  logging.info( "\t\tProcessing %d job_queue" % (job_size))
  is_main_run_factor=1
  # running job_queue
  threads = []
  workerload = 3 
  for i in range(len(cluster)):
    for j in range(workerload):
      if job_queue.empty(): break
      t = Worker(job_queue, cluster[i])
      time.sleep(is_main_run_factor)
      try:
        t.start()
        threads.append(t)
      except ThreadError:
        logging.warning("\t\tError: thread error caught!")
    pass
  for t in threads:
    t.join()
    pass
  pass 
Example #13
0
def run():
  jobs=[]
  n=0
  is_main_run_factor=5
  filenames=['cancer','ArD20','ArD30','toy10','toy50','emotions','yeast','medical','scene','enron','cal500','fp']
  #filenames=['scene']
  n=0
  # generate jobs
  logging.info('\t\tGenerating job queue.')
  for slack_c in ['100','1','0.1','10','0.01','50','0.5','20','0.05','5']:
    for kth_fold in ['1','2','3','4','5']:
      for filename in filenames:
        graph_type = 'tree'
        for kappa in ['2','8','16','20']:
          for l_norm in ['2']:
            for t in range(0,41,10):
              if t==0:
                t=1
              para_t="%d" % (t)
              if checkfile(filename,graph_type,para_t,kth_fold,l_norm,kappa,slack_c):
                continue
              else:
                n=n+1
                job_queue.put((n,filename,graph_type,para_t,kth_fold,l_norm,kappa,slack_c))
              pass # for slack_c
            pass # for |T|
          pass # for l
        pass # for kappa
      pass # for datasets
    pass # for k fole
  # get computing nodes
  cluster = get_free_nodes()[0]
  # running jobs
  job_size = job_queue.qsize()
  logging.info( "\t\tProcessing %d jobs" % (job_size))
  threads = []
  for i in range(len(cluster)):
    if job_queue.empty():
      break
    t = Worker(job_queue, cluster[i])
    time.sleep(is_main_run_factor)
    try:
      t.start()
      threads.append(t)
    except ThreadError:
      logging.warning("\t\tError: thread error caught!")
    pass
  for t in threads:
    t.join()
    pass
  pass # def
def parameter_selection():
    cluster = get_free_nodes()[0]
    #cluster = ['dave']
    jobs = []
    n = 0
    is_main_run = 0

    #filenames=['emotions','yeast','scene','enron','medical','toy10','toy50']#,'cal500','fp','cancer']
    filenames = [
        'toy10', 'toy50', 'emotions', 'yeast', 'scene', 'enron', 'medical'
    ]
    n = 0
    for filename in filenames:
        for graph_type in ['tree']:
            for l_norm in ['2']:
                in_i = 0
                for in_c in [
                        '100', '75', '50', '20', '10', '5', '1', '0.5', '0.25',
                        '0.1', '0.01'
                ]:
                    in_i += 1
                    for kth_fold in ['1', '2', '3', '4', '5']:
                        node = cluster[n % len(cluster)]
                        n += 1
                        p = multiprocessing.Process(target=singleRSTA,
                                                    args=(
                                                        filename,
                                                        graph_type,
                                                        '1',
                                                        node,
                                                        kth_fold,
                                                        l_norm,
                                                        "%d" % in_i,
                                                        in_c,
                                                    ))
                        jobs.append(p)
                        p.start()
                        time.sleep(2)  # fold
                        pass
                time.sleep(2)  # c
                pass
            time.sleep(2 * is_main_run)  # lnorm
            pass
        time.sleep(60 * is_main_run)  # tree
        for job in jobs:
            job.join()
            pass
        pass
Example #15
0
def compute_graph_kernels_in_parallel():
    cluster = get_free_nodes()[0]
    kernelnamelist = [
        'lRWkernel', 'WL', 'WLedge', 'WLspdelta', 'RGkernel1',
        'l3graphletkernel', 'untilpRWkernel4', 'untilpRWkernel6',
        'untilpRWkernel8', 'untilpRWkernel10', 'spkernel', 'SPkernel',
        'RWkernel', 'gestkernel3', 'gestkernel4', 'connectedkernel3',
        'connectedkernel4', 'connectedkernel5'
    ]
    # generate jobs and so on
    job_id = 0
    job_size = 1
    for kernelname in kernelnamelist:
        kernelresultfile = "../DTPNCI2015/results/ncicancer_kernel_graph_%s" % (
            kernelname)
        if os.path.exists(kernelresultfile):
            continue
        job_id = job_id + 1
        job_content = kernelname
        job_queue.put((job_id, job_content))
    logging.info("Processing jobs ...")
    # processing jobs
    job_size = job_queue.qsize()
    logging.info("In total %d jobs" % job_size)
    loadpernode = 1
    threads = []
    counter = 0
    for node in cluster:
        for i in range(loadpernode):
            t = Worker(job_queue, node)
            time.sleep(0.5)
            counter = counter + 1
            if counter > job_size:
                break
            try:
                t.start()
                threads.append(t)
            except ThreadError:
                logging.warning("\t\tThread error!")
    for t in threads:
        t.join()
    pass
def run():
  logging.info('\t\tGenerating priority queue.')
  paramind = 0
  K = 10 
  for c in ['0.01','0.05','0.1','0.5','1','5','10','50','100','1000']:
    for g in ['0.0001','0.001','0.005','0.01','0.05','0.1','0.5','1','5','10','50','100']:
      for k in range(1,(K+1)):
        paramind += 1
        outfilename = '../../Learning/Results/ParameterSelection/%d' % paramind
        if checkfile(outfilename):
          continue
        job_queue.put((paramind,(str(paramind),str(K),str(k),c,g,outfilename)))
  # get computing node
  cluster = get_free_nodes()[0]
  #cluster = ['ukko133.hpc'] 
  # run jobs
  job_size = job_queue.qsize()
  logging.info( "\t\tProcessing %d job_queue" % (job_size))
  is_main_run_factor=1
  # running job_queue
  threads = []
  workerload = 10 
  for i in range(len(cluster)):
    for j in range(workerload):
      if job_queue.empty():
        break
      t = Worker(job_queue, cluster[i])
      time.sleep(is_main_run_factor)
      try:
        t.start()
        threads.append(t)
      except ThreadError:
        logging.warning("\t\tError: thread error caught!")
    pass
  for t in threads:
    t.join()
    pass
  pass # def
Example #17
0
def simulationAndRealignment():
	'''
	#simulation
	print "start simulation"
	os.system("echo 'start simulation' > %s"%(logFile))
	block_size = cpu_num*max_thread
	for block_id in range(block):
		if (block_id + 1) * block_size > seq_num:
			block_size = seq_num - block_id * block_size 
		os.system("./pair_hmm.py len=%s a=%s e=%s g=%s l=%s rep=%d tree='%s' id=%d in=%s simul=1>> %s"%(options.length, options.indel, options.extension, options.gamma, options.Lambda, block_size, options.tree, block_id, options.ifname, logFile))
	'''
	#realignment
	#get job queue
	t_range = 12 
	print "get the job queue"
	job_id = 0
	for i in range(t_range):
		for j in range(block):
			job_queue.put((job_id, 0.05 + 0.05 * i, j))
			job_id += 1
	#processing
	job_size = job_queue.qsize()
        print "processing %d jobs" % (job_size)
	start = time.time()
	os.system("echo 'processing %d jobs' >> %s"%(job_size, logFile))
	threads = []
	cluster = get_free_nodes()[0]
	#cluster=['ukko003.hpc','ukko004.hpc','ukko005.hpc','ukko006.hpc','ukko007.hpc','ukko008.hpc']
	load = 1
	for i in range(load):
		if job_queue.empty():
			break
		nodes_num = 0
		for j in range(max_node):
			if job_queue.empty():
				break
			os.system("echo '---->%d  %s'>> %s"%(j, cluster[j%len(cluster)], logFile)) 
			t = Worker(job_queue, cluster[j%len(cluster)])
			nodes_num += 1
			time.sleep(1)
			try:
				t.start()
				threads.append(t)
			except ThreadError:
				os.system("echo '\t\tError: thread error caught!'>>%s"%(logFile))
		time.sleep(random.randint(5000,6000)/1000.0)
		
		os.system("echo 'using %d nodes in cluster' >>%s"%(nodes_num,  logFile))
	for t in threads:
		t.join()

	#combine results
	os.system("echo 'combining results' >>%s"%(logFile))
	accuracy = []
	score = []
	for i in range(t_range):
		acc = []
		lScore = []
		for j in range(block):
			jobFName = "%s/result_for_%s_%d_with_t%.2f"%(dataDir, os.path.splitext(os.path.basename(options.ifname))[0], j, 0.05+0.05*i)
			s = commands.getoutput("tail -1 %s |tr -d '(|)|'|tr ',' ' '"%(jobFName))
			s = s.split()
			acc.append(float(s[0]))
			lScore.append(float(s[1]))
			#os.system("rm "+jobFName)
		accuracy.append(np.array(acc).mean())
		score.append(np.array(lScore).mean())
	os.system("echo %s > %s/finalResult"%(' '.join([str(i) for i in accuracy]), dataDir))
	os.system("echo %s > %s/finalScore"%(' '.join([str(i) for i in score]), dataDir))
	os.system("echo '%.4gs' >> %s"%(time.time()-start, logFile))
Example #18
0
def run():
    is_main_run_factor = 5
    #filenames=['toy10','toy50','emotions','medical','enron','yeast','scene','cal500','fp','cancer']
    #filenames=['cancer']
    filenames = [
        'toy10', 'toy50', 'emotions', 'yeast', 'scene', 'enron', 'fp',
        'medical'
    ]
    n = 0
    # generate job_queue
    logging.info('\t\tGenerating priority queue.')
    for newton_method in ['1', '0']:
        for filename in filenames:
            for slack_c in ['1', '10', '0.1']:
                for t in [1, 5, 10, 20, 30]:
                    para_t = "%d" % (t)
                    graph_type = 'tree'
                    for kappa in [
                            '1', '2', '3', '4', '5', '6', '8', '10', '12',
                            '14', '16'
                    ]:
                        for l_norm in ['2']:
                            #for kth_fold in ['1','2','3','4','5']:
                            for kth_fold in ['1']:
                                for loss_scaling_factor in ['0.1', '1']:
                                    if checkfile(filename, graph_type, para_t,
                                                 kth_fold, l_norm, kappa,
                                                 slack_c, loss_scaling_factor,
                                                 newton_method):
                                        continue
                                    else:
                                        n = n + 1
                                        job_queue.put(
                                            (n, (filename, graph_type, para_t,
                                                 kth_fold, l_norm, kappa,
                                                 slack_c, loss_scaling_factor,
                                                 newton_method)))
                                    pass  # for newton_method
                                pass  # for loss_scaling_factor
                            pass  # for slack_c
                        pass  # for |T|
                    pass  # for l
                pass  # for kappa
            pass  # for datasets
        pass  # for k fole
    # get computing nodes
    cluster = get_free_nodes(
    )[0]  # if you have access to some interactive computer cluster, get the list of hostnames of the cluster
    #cluster = ['melkinkari'] # if you don't have access to any computer cluster, just use your machine as the only computing node
    # running job_queue
    job_size = job_queue.qsize()
    logging.info("\t\tProcessing %d job_queue" % (job_size))
    threads = []
    for i in range(len(cluster)):
        if job_queue.empty():
            break
        t = Worker(job_queue, cluster[i])
        time.sleep(is_main_run_factor)
        try:
            t.start()
            threads.append(t)
        except ThreadError:
            logging.warning("\t\tError: thread error caught!")
        pass
    for t in threads:
        t.join()
        pass
    pass  # def
def run():
  '''
  a wrapper function to generate cluster node with good perforamnce, to define a job space, and to launch jobs on computing node of the cluster
  '''
  logging.info('\t\tGenerating priority queue.')
  paramInd = 0
  kFold    = 5 
  suffix   = 'sel'
  isTest   = '0'
  # iterate over the lists
  xFilenameList         = ['../Data/tcdb.all.KUNIMKL','../Data/tcdb.all.KALIGN','../Data/tcdb.all.KALIGNF']
  xFilenameList         = ['../Data/tcdb.all.GUNIMKL','../Data/tcdb.all.GALIGN','../Data/tcdb.all.GALIGNF']
  foldIndexList         = xrange(1,kFold+1) 
  cList                 = ['1000','5000','10000','50000']
  stepSize1List         = ['7','9','11']
  stepSize2List         = ['7','9','11']
  cList.reverse()
  stepSize1List.reverse()
  stepSize2List.reverse()

  yFilename   = '../Data/tcdb.TC'
  EFilename   = '../Data/tcdb.TC.E'
  SFilename   = '../Data/tcdb.TC.SrcSpc'

  for xFilename,foldIndex,sopC,stepSize1,stepSize2 in list(itertools.product(xFilenameList,foldIndexList,cList,stepSize1List,stepSize2List)):
    tmpDir   = '../ResultsSOP/tmp_%s_%s/' % ( re.sub('.*/','',xFilename), re.sub('.*/','',yFilename))
    if not os.path.exists(tmpDir): os.mkdir(tmpDir)
    paramInd += 1
    outputFilename = tmpDir + '/' + re.sub('.*/','',xFilename) + '_' + re.sub('.*/','',yFilename) + '_f' + str(foldIndex) + '_c' +sopC + '_s1' + stepSize1 + '_s2' + stepSize2 + '_t' + isTest + '_' + suffix + '.mat'
    logFilename = tmpDir + '/' + re.sub('.*/','',xFilename) + '_' + re.sub('.*/','',yFilename) + '_f' + str(foldIndex) + '_c' +sopC + '_s1' + stepSize1 + '_s2' + stepSize2 + '_t' + isTest + '_' + suffix + '.log'
    ## check if result is ready already
    if checkfile(outputFilename): continue
    ## put parameter into queue
    job_queue.put( (paramInd,(str(paramInd),xFilename,yFilename,EFilename,SFilename,str(foldIndex),sopC,outputFilename,logFilename,stepSize1,stepSize2,isTest,suffix)) )

  # get computing node
  logging.info('\t\tObtain cluster node')
  cluster = get_free_nodes()[0]
  #cluster = ['ukko133'] 

  # run jobs
  job_size = job_queue.qsize()
  logging.info( "\t\tProcessing %d job_queue" % (job_size))
  is_main_run_factor=1
  # running job_queue
  threads = []
  workerload = 1 
  for i in range(len(cluster)):
    for j in range(workerload):
      if job_queue.empty(): break
      t = Worker(job_queue, cluster[i])
      time.sleep(is_main_run_factor)
      try:
        t.start()
        threads.append(t)
      except ThreadError:
        logging.warning("\t\tError: thread error caught!")
    pass
  for t in threads:
    t.join()
    pass
  pass