コード例 #1
0
ファイル: jobscreator.py プロジェクト: kishori82/multi-seq
    def create_ref_genome_index_cmd(self, s):
        """ CREATE_REF_GENOME_INDEX """
        contexts = []
        context = Context()
        context.name = 'CREATE_REF_GENOME_INDEX'
        '''input'''
        context.inputs = {'refseq_genome_sequences': s.ref_genome_sequences}
        '''params'''
        aligner = self.params.get('ALIGNMENT', 'aligner')

        context.status = self.params.get('multiseq_steps',
                                         'CREATE_REF_GENOME_INDEX')

        if aligner == 'bowtie2':
            '''outputs'''
            context.outputs = {
                'index':
                s.refindex_dir + pathDelim() + s.refindex_name + ".1.bt2"
            }
            executable_name = self.configs.BOWTIE2BUILD
            cmd = "%s %s %s" % (executable_name, s.ref_genome_sequences,
                                s.refindex_dir + pathDelim() + s.refindex_name)
            context.message = self._Message(
                "CREATE_REF_GENOME_INDEX : bowtie2\n")
            context.commands = [cmd]
            contexts.append(context)

        return contexts
コード例 #2
0
ファイル: jobscreator.py プロジェクト: kishori82/multi-seq
    def align_reads_cmd(self, s):
        """ ALIGN_READS """

        contexts = []
        context = Context()
        context.name = 'ALIGN_READS'
        '''inputs'''
        context.inputs = {
            'reads': s.input_files[1],
            'index': s.refindex_dir + pathDelim() + s.refindex_name + ".1.bt2"
        }
        '''params'''
        aligner = self.params.get('ALIGNMENT', 'aligner')
        context.status = self.params.get('multiseq_steps', 'ALIGN_READS')
        q30filter = self.params.get('QC', 'q30filter')

        if aligner == 'bowtie2':
            '''outputs'''
            context.outputs = {
                "samfile":
                s.alignment_dir + pathDelim() + s.sample_name + '.sam',
                "bamfile":
                s.alignment_dir + pathDelim() + s.sample_name + '.bam',
                "samfile_sample_down":
                s.alignment_dir + pathDelim() + s.sample_name +
                '.sample_down.sam',
                "bedfile":
                s.alignment_dir + pathDelim() + s.sample_name + '.bed',
                "bedfile_sample_down":
                s.alignment_dir + pathDelim() + s.sample_name +
                '.sample_down.bed'
            }

            pyscript = self.configs.ALIGN_READS
            cmd = "%s -p %s -x %s -U %s -S %s" \
                % (pyscript, self.configs.NUM_PROCS,
                  s.refindex_dir  + pathDelim() + s.refindex_name, s.input_files[1],
                   context.outputs['samfile'])

            if q30filter == 'on':
                cmd += " --q30filter"

            context.message = self._Message("ALIGN_READS : bowtie2\n")
            context.commands = [cmd]
            contexts.append(context)

        return contexts
コード例 #3
0
ファイル: jobscreator.py プロジェクト: kishori82/multi-seq
    def create_figures_cmd(self, s):
        '''CREATE_FIGURES'''

        contexts = []
        Rscript = self.configs.MULTISEQ_PATH + PATHDELIM + self.configs.CREATE_FIGURES

        context = Context()
        context.name = "CREATE FIGURES"
        context.message = self._Message("CREATE_FIGURES\n")
        context.status = self.params.get('multiseq_steps', 'CREATE_FIGURES')

        context.inputs["expmatrix"] = s.expression_matrix_dir + pathDelim(
        ) + s.sample_name + ".expmatrix.txt"
        context.outputs[
            'barcode_rank_plot'] = s.expression_matrix_dir + PATHDELIM + 'barcode_rank_plot.pdf'

        cmd = "R CMD BATCH --no-save \'--args %s %s" % (
            context.inputs['expmatrix'], context.outputs['barcode_rank_plot'])
        cmd = cmd + '\' ' + Rscript + '  /dev/null'

        context.commands = [cmd]
        contexts.append(context)
        return contexts
    import optparse, sys, re, csv, traceback
    from os import path
    import logging.handlers

    from libs.python_modules.utils.sysutil import pathDelim
    from libs.python_modules.utils.metapathways_utils import fprintf, printf, eprintf, exit_process
    from libs.python_modules.utils.sysutil import getstatusoutput
    from libs.python_modules.utils.errorcodes import error_message, get_error_list, insert_error
except:
    print """ Could not load some user defined  module functions"""
    print """ Make sure your typed 'source MetaPathwaysrc'"""
    print """ """
    print traceback.print_exc(10)
    sys.exit(3)

PATHDELIM = pathDelim()


def fprintf(file, fmt, *args):
    file.write(fmt % args)


def printf(fmt, *args):
    sys.stdout.write(fmt % args)


def files_exist(files):
    for file in files:
        if not path.exists(file):
            print 'Could not read File ' + file
            print 'Please make sure these sequences are in the \"blastDB\" folder'
コード例 #5
0
   from optparse import OptionParser, OptionGroup

   from libs.python_modules.utils.sysutil import pathDelim
   from libs.python_modules.utils.metapathways_utils  import fprintf, printf, eprintf,  exit_process
   from libs.python_modules.utils.sysutil import getstatusoutput

   from libs.python_modules.utils.pathwaytoolsutils import *

except:
     print """ Could not load some user defined  module functions"""
     print """ Make sure your typed 'source MetaPathwaysrc'"""
     print """ """
     print traceback.print_exc(10)
     sys.exit(3)

PATHDELIM=pathDelim()



def fprintf(file, fmt, *args):
    file.write(fmt % args)

def printf(fmt, *args):
    sys.stdout.write(fmt % args)

def files_exist( files , errorlogger = None):
    status = True    
    for file in files:
       if not path.exists(file):
          if errorlogger:
             errorlogger.write( 'ERROR\tCould not find ptools input  file : ' +  file )
    from libs.python_modules.utils.sysutil import getstatusoutput
    from libs.python_modules.utils.utils import doesFileExist, createDummyFile
    from libs.python_modules.utils.errorcodes import *
    from libs.python_modules.utils.sysutil import pathDelim
    from libs.python_modules.parsers.blast import BlastOutputTsvParser, getParsedBlastFileNames, getrRNAStatFileNames
except:
    print """ Could not load some user defined  module functions"""
    print """ Make sure your typed 'source MetaPathwaysrc'"""
    print """ """
    sys.exit(3)

usage = sys.argv[
    0] + """ dbname2 -b parsed_blastout_for_database2 -w weight_for_database2 ] [ --rRNA_16S  16SrRNA-stats-table ] [ --tRNA tRNA-stats-table ] [ --compact_output ]"""
parser = None

PATHDELIM = str(pathDelim())


def createParser():
    global parser
    epilog = """Reads the parsed BLAST/LAST files to create files that provides gene count
               and then convert to biom format """

    epilog = re.sub(r'\s+', ' ', epilog)
    parser = OptionParser(usage=usage, epilog=epilog)

    parser.add_option("-a",
                      "--algorithm",
                      dest="algorithm",
                      default="BLAST",
                      help="algorithm BLAST or LAST [default BLAST]")
コード例 #7
0
ファイル: MetaPathways.py プロジェクト: nielshanson/myapp
     from libs.python_modules.grid.blast_using_grid import blast_in_grid

     from libs.python_modules.diagnostics.parameters import *
     from libs.python_modules.diagnostics.diagnoze import *
     from libs.python_modules.pipeline.sampledata import *
except:
   print """ Could not load some user defined  module functions"""
   print """ Make sure your typed \"source MetaPathwaysrc\""""
   print """ """
   #print traceback.print_exc(10)
   sys.exit(3)


cmd_folder = path.abspath(path.split(inspect.getfile( inspect.currentframe() ))[0])

PATHDELIM =  str(pathDelim())

#print cmd_folder
#if not sys.platform.startswith('win'):
#    res =getstatusoutput('source  '+ cmd_folder +'/'+'.metapathsrc')
#    if( int(res[0])==0 ): 
#       print 'Ran ' + cmd_folder +'/'+'.metapathsrc ' + ' file successfully!'
#    else:
#       print 'Error : ' + res[1] 
#       print 'while running  ' + cmd_folder +'/'+'.metapathsrc ' + ' file!'

#sys.path.insert(0,cmd_folder + "/libs/python_modules/")
#sys.path.insert(1, cmd_folder + "/libs/")
#print sys.path

#config = load_config()
コード例 #8
0
def main(argv, errorlogger=None, runcommand=None, runstatslogger=None):
    global parser

    options, args = parser.parse_args(argv)

    gene_on_symbol_bed = options.expression_mat_dir + PATHDELIM + options.sample_name + ".gene_on_symbol.bed"
    (code, message) = _execute_bedtools_intersect(
        options.alignment_dir + PATHDELIM + options.sample_name + ".bed",
        options.preprocess_dir + PATHDELIM + options.sample_name +
        ".gene_anno_symbol.bed",
        gene_on_symbol_bed,
        additional_params="-wo | sort -k 4,4 - ")

    gene_on_cds_bed = options.expression_mat_dir + PATHDELIM + options.sample_name + ".gene_on_cds.bed"
    (code, message) = _execute_bedtools_intersect(
        options.alignment_dir + PATHDELIM + options.sample_name + ".bed",
        options.preprocess_dir + PATHDELIM + options.sample_name +
        ".gene_anno_cds.bed",
        gene_on_cds_bed,
        additional_params="-c | sort -k 4,4 -  ")

    gene_on_3utr_bed = options.expression_mat_dir + PATHDELIM + options.sample_name + ".gene_on_3utr.bed"
    (code, message) = _execute_bedtools_intersect(
        options.alignment_dir + PATHDELIM + options.sample_name + ".bed",
        options.preprocess_dir + PATHDELIM + options.sample_name +
        ".gene_anno_3utr.bed",
        gene_on_3utr_bed,
        additional_params="-c  | sort -k 4,4 -")

    gene_on_5utr_bed = options.expression_mat_dir + PATHDELIM + options.sample_name + ".gene_on_5utr.bed"
    (code, message) = _execute_bedtools_intersect(
        options.alignment_dir + PATHDELIM + options.sample_name + ".bed",
        options.preprocess_dir + PATHDELIM + options.sample_name +
        ".gene_anno_5utr.bed",
        gene_on_5utr_bed,
        additional_params="-c | sort -k 4,4 - ")
    gene_on_TTSdis_bed = options.expression_mat_dir + PATHDELIM + options.sample_name + ".gene_on_TTSdis.bed"
    (code, message) = _execute_bedtools_intersect(
        options.alignment_dir + PATHDELIM + options.sample_name + ".bed",
        options.preprocess_dir + PATHDELIM + options.sample_name +
        ".gene_anno_TTSdis.bed",
        gene_on_TTSdis_bed,
        additional_params="-c | sort -k 4,4 -")

    # sample_down_transform_sam(options.samout, bedout, samout_sampledown, bedout_sampledown, 5000000, options.q30filter)
    # cmd1 = "bedtools intersect -a %s -b %s  -wo   | sort -k 4,4 - >  %s" % (
    # conf_dict['General']['bed'], annotation_dir + conf_dict['General']['outname'] + '_gene_anno_symbol.bed',
    # conf_dict['General']['outname'] + '_on_symbol.bed')
    # cmd2 = "bedtools intersect -a %s -b %s -c | sort -k 4,4 - > %s" % (
    # conf_dict['General']['bed'], annotation_dir + conf_dict['General']['outname'] + '_gene_anno_cds.bed',
    # conf_dict['General']['outname'] + '_on_cds.bed')
    # cmd3 = "bedtools intersect -a %s -b %s -c | sort -k 4,4 - > %s" % (
    # conf_dict['General']['bed'], annotation_dir + conf_dict['General']['outname'] + '_gene_anno_3utr.bed',
    # conf_dict['General']['outname'] + '_on_3utr.bed')
    # cmd4 = "bedtools intersect -a %s -b %s -c | sort -k 4,4 - > %s" % (
    # conf_dict['General']['bed'], annotation_dir + conf_dict['General']['outname'] + '_gene_anno_5utr.bed',
    # conf_dict['General']['outname'] + '_on_5utr.bed')
    # cmd5 = "bedtools intersect -a %s -b %s -c | sort -k 4,4 - > %s" % (
    # conf_dict['General']['bed'], annotation_dir + conf_dict['General']['outname'] + '_gene_anno_TTSdis.bed',
    # conf_dict['General']['outname'] + '_on_TTSdis.bed')

    combined_hits_bed = options.expression_mat_dir + pathDelim(
    ) + options.sample_name + ".combined.bed"
    try:
        combine_reads(options.extracted_barcodes, gene_on_cds_bed,
                      gene_on_3utr_bed, gene_on_5utr_bed, gene_on_symbol_bed,
                      gene_on_TTSdis_bed, combined_hits_bed, 2)
    except:
        print(traceback.print_exc(10))
        sys.exit(3)

    combined_hits_bed_sorted = options.expression_mat_dir + pathDelim(
    ) + options.sample_name + ".combined.sorted.bed"
    cmd = "sort -k 7,7 -k 5,5 %s > %s" % (combined_hits_bed,
                                          combined_hits_bed_sorted)
    result = getstatusoutput(cmd)

    qcmatrix = options.expression_mat_dir + pathDelim(
    ) + options.sample_name + ".qcmatrix.txt"
    expmatrix = options.expression_mat_dir + pathDelim(
    ) + options.sample_name + ".expmatrix.txt"
    qcmatrix_full = options.expression_mat_dir + pathDelim(
    ) + options.sample_name + ".qcmatrix_full.txt"

    try:
        generate_matrix(options.gene_annotation_file, combined_hits_bed_sorted,
                        True, qcmatrix_full, qcmatrix, expmatrix, 2, True)
    except:
        print(traceback.print_exc(10))
        sys.exit(3)

    if code != 0:
        a = '\nERROR\tCannot successfully execute\n'
        outputStr = a

        eprintf(outputStr + "\n")

        if errorlogger:
            errorlogger.printf(outputStr + "\n")
        return code

    return 0
コード例 #9
0
ファイル: jobscreator.py プロジェクト: kishori82/multi-seq
    def create_expression_matrix_cmd(self, s):
        """ CREATE_EXPRESSION_MATRIX """

        contexts = []
        context = Context()
        context.name = 'CREATE_EXPRESSION_MATRIX'
        '''inputs'''
        context.inputs = {
            "gene_anno_3utr":
            s.preprocessed_dir + PATHDELIM + s.sample_name +
            ".gene_anno_3utr.bed",
            "gene_anno_5utr":
            s.preprocessed_dir + PATHDELIM + s.sample_name +
            ".gene_anno_5utr.bed",
            "gene_anno_cds":
            s.preprocessed_dir + PATHDELIM + s.sample_name +
            ".gene_anno_cds.bed",
            "gene_anno_TTSdis":
            s.preprocessed_dir + PATHDELIM + s.sample_name +
            ".gene_anno_TTSdis.bed",
            "gene_anno_symbol":
            s.preprocessed_dir + PATHDELIM + s.sample_name +
            ".gene_anno_symbol.bed",
            "barcodes_sorted":
            s.preprocessed_dir + PATHDELIM + s.sample_name +
            ".barcodes.sorted.txt"
        }
        '''params'''
        context.status = self.params.get('multiseq_steps',
                                         'CREATE_EXPRESSION_MATRIX')
        '''outputs'''
        context.outputs = {
            "gene_on_3utr":
            s.expression_matrix_dir + PATHDELIM + s.sample_name +
            ".gene_on_3utr.bed",
            "gene_on_5utr":
            s.expression_matrix_dir + PATHDELIM + s.sample_name +
            ".gene_on_5utr.bed",
            "gene_on_cds":
            s.expression_matrix_dir + PATHDELIM + s.sample_name +
            ".gene_on_cds.bed",
            "gene_on_TTSdis":
            s.expression_matrix_dir + PATHDELIM + s.sample_name +
            ".gene_on_TTSdis.bed",
            "gene_on_symbol":
            s.expression_matrix_dir + PATHDELIM + s.sample_name +
            ".gene_on_symbol.bed",
            'combined_hits_bed':
            s.expression_matrix_dir + pathDelim() + s.sample_name +
            ".combined.bed",
            "qcmatrix":
            s.expression_matrix_dir + pathDelim() + s.sample_name +
            ".qcmatrix.txt",
            "expmatrix":
            s.expression_matrix_dir + pathDelim() + s.sample_name +
            ".expmatrix.txt",
            "qcmatrix_full":
            s.expression_matrix_dir + pathDelim() + s.sample_name +
            ".qcmatrix_full.txt"
        }

        pyscript = self.configs.CREATE_EXPRESSION_MATRIX
        cmd = "%s -s %s -b %s --preprocess_dir %s --alignment_dir %s --expression_mat_dir %s -g %s" \
                  % (pyscript, s.sample_name, context.inputs['barcodes_sorted'], s.preprocessed_dir, s.alignment_dir,
                     s.expression_matrix_dir, s.ref_gene_annotations)

        context.message = self._Message("CREATE_EXPRESSION_MATRIX\n")
        context.commands = [cmd]
        contexts.append(context)

        return contexts
コード例 #10
0
class BlastService:
     SSH = 'ssh'
     SCP = ''
     PATHDELIM = pathDelim()
     WIN_RSA_KEY_ARGS = ["-i", 'executables' + PATHDELIM + 'win' + PATHDELIM + 'bit64' + PATHDELIM + 'win_rsa.ppk', '-batch']
     

     sub_string = ""
     submission_type = '0'


     def buildSSHLogin(self, connType='ssh'):  
         user, server = self.getUserServer()
         args = [ connType]

         if hasattr(self, 'keyfile'):
             args += ['-i', self.keyfile ]

         if connType=='ssh':
             args += [user+'@'+server]  

         return args


     def  isValid(self, opts):
         
         if not hasattr(opts,'sample_name'):
            return False
     
         if not hasattr(opts,'user') or not hasattr(opts,'server'):
            return False
     
         if opts.sample_name:
            return True 
         return False
     
     
     def getUserServer(self):
         return (self.user, self.server)


     def create_a_process(args):
        p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        return p

     def _submit_remote_location(basicargs):

        args = self.buildSSHLogin()  
        args += ['python' ]
        args.extend(basicargs)
    
        print ' '.join(args)

        p = create_a_process(args)

        result = p.communicate()
        return result

     def _interpret_results(self, _result, expect):
         
          result = ' '.join(_result)
          if expect in result:
             return (True, result)
          else:
             return (False, result)
     
     
     def _parse_array_results(self, result, delim = ','):
          array = []
          for  x in result[0].split(delim):
             if x.strip():
               array.append(x.strip())
          return array

    
     def  __remote_check_if_server_is_up(self, user, server):
     #    user, server = self.getUserServer()

         args = self.buildSSHLogin()  

         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS

         args += ['echo','hello']
         p = self.create_a_process(args)  
         result = p.communicate()
         (boolean, message)  = self._interpret_results(result, 'hello')
         return boolean 
     
     def  __remote_Remove_Sample_Folders(self, samples,  home_dir ='~', working_dir = '~'):
         args = self.buildSSHLogin()  
         fullFolderNames = []
         for s in samples:
            fullFolderNames.append('--remove-sample-dirs')
            fullFolderNames.append(working_dir + '/MetaPathways/samples/' + s)
            
         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS
         args = args + ['python', 'daemon.py','--home-dir', home_dir ] + fullFolderNames
         #print ' '.join(args)
         p = self.create_a_process(args)  
         result = p.communicate()
         (boolean, message)  = self._interpret_results(result, '<<Success!>>')
         return boolean

     def __local_No_of_Lines(self, filename):
         return number_of_lines_in_file(filename)

     def __remote_No_of_Lines(self, filename, home_dir='~', working_dir = '~' ):
         args = self.buildSSHLogin()  
         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS
         args = args + ['python',  'daemon.py','--home-dir', home_dir, '--number-of-lines-in-file', working_dir + '/' + filename]
         #print ' '.join(args)
         p = self.create_a_process(args)  
         result = p.communicate()
         (boolean, message)  = self._interpret_results(result, 'number')
         try: 
            return int(message)
         except:
            return -1

     def __remote_deleteRemoteFile(self, filename, home_dir = '~',  working_dir='~'):
         args = self.buildSSHLogin()  
         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS
         args = args + ['python',  'daemon.py','--home-dir', home_dir, '--remove-file', working_dir + '/' + filename]
         #print ' '.join(args)
         p = self.create_a_process(args)  
         result = p.communicate()
         (boolean, message)  = self._interpret_results(result, '<<Success!>>')
         return boolean


     def __remote_copy_file(self, source, target):
          user, server = self.getUserServer()
          args  = self.buildSSHLogin( 'scp')  
          args += [ source , user+'@'+server+':'+ target]

          p = self.create_a_process(args)
          result = p.communicate()
          return result[0]==''

     def __remote_copy_file_back(self, source, target):
          user, server = self.getUserServer()
          args  = self.buildSSHLogin( 'scp')  
          args += [user+'@'+server+':'+ source, target]

          p = self.create_a_process(args)
          result = p.communicate()
          (boolean, message)  = self._interpret_results(result, '')
          return boolean

     def __remote_DownloadFile(self, source, target):
         self.__remote_copy_file_back(source, target) 
         if path.exists(target) :
            return True
         else:
            return False

     def __remote_createFile(self, file_name, home_dir='~', working_dir = '~'):
         self.__remote_copy_file(self.MetaPathwaysDir  + PATHDELIM + self.Files[file_name][0] + PATHDELIM + file_name, working_dir + '/' + self.Files[file_name][1] + '/' + file_name) 
         if self.__remote_doesFileExist(working_dir + '/' + self.Files[file_name][1] + '/' + file_name) :
            return True
         else:
            return False

     def __remote_createFolder(self, folder_name, home_dir='~', working_dir = '~'):
         args  = self.buildSSHLogin()  
         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS
         args = args + ['python', 'daemon.py','--home-dir', home_dir, '--create-sample-dir', working_dir + '/' + folder_name]
         p = self.create_a_process(args)  
         result = p.communicate()
         (boolean, message)  = self._interpret_results(result, '<<Success!>>')
         return boolean
     
     
     def __remote_doesFolderExist(self, folder_name, home_dir='~', working_dir = '~'):
         args  = self.buildSSHLogin()  
         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS
         args += ['python', 'daemon.py','--home-dir', home_dir, '--does-sample-dir-exist', working_dir + '/' + folder_name]
         #print ' '.join(args)
         p = self.create_a_process(args)  
         result = p.communicate()
         (boolean, message)  = self._interpret_results(result, '<<Success!>>')
         #print  folder_name + ' folder ' + str(boolean)
         return boolean

     def __remote_doesFileExist(self, file_name, home_dir='~', working_dir = '~'):
         args  = self.buildSSHLogin()  
         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS
         args += ['python', 'daemon.py','--home-dir', home_dir, '--does-file-exist', working_dir + '/' + file_name]
         p = self.create_a_process(args)  
         result = p.communicate()
         (boolean, message)  = self._interpret_results(result, '<<Success!>>')
         return boolean

     def __remote_doesFilePatternExist(self, file_pattern, home_dir='~', working_dir = '~'):
         args  = self.buildSSHLogin()  
         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS
         args += ['python', 'daemon.py','--home-dir', home_dir, '--does-file-pattern-exist', working_dir + '/' + file_pattern]
         p = self.create_a_process(args)  
         result = p.communicate()
         (boolean, message)  = self._interpret_results(result, '<<Success!>>')
         return boolean

     def safeDoubleQuotes(self, string) :
         return ('\"' + string + '\"')
     def __remote_getFileNamesWithPattern(self, file_pattern, home_dir='~', working_dir = '~'):
         args  = self.buildSSHLogin()  
         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS
         args += ['python', 'daemon.py','--home-dir', home_dir, '--get-files-with-pattern',  self.safeDoubleQuotes(working_dir + '/' + file_pattern)]
         #print ' '.join(args) 
         p = self.create_a_process(args)  
         result = p.communicate()
         array =  self._parse_array_results(result)
         #print array
         return array

     def __remote_isDBFormatted(self, remote_DB_dir, dbname, algorithm, working_dir = '~'):
            #Make sure DATABASESES are formatted
            dbnamePath = remote_DB_dir + '/' + dbname
            if algorithm.upper() == 'LAST': 
               suffixes = [ 'des', 'sds', 'suf', 'bck', 'prj', 'ssp', 'tis' ]
     
            if algorithm.upper() == 'BLAST': 
               suffixes = [ 'psq', 'phr', 'pin' ]
     
            for suffix in suffixes:
               filepattern = dbnamePath + '*' + '.' + suffix
                
               if not self.__remote_doesFilePatternExist(filepattern, working_dir = working_dir ):
                  return False
               
            return True
            
     #format the remote db
     def __remote_formatDB(self, dbname, algorithm, home_dir='~', working_dir= '~'):
         args  = self.buildSSHLogin()  
         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS
         args += ['python', 'daemon.py','--home-dir', working_dir, '--format-database', dbname, '--algorithm', algorithm]
         #print ' '.join(args)
 
         p = self.create_a_process(args)  
         result = p.communicate()
         (boolean, message)  = self._interpret_results(result, '<<Success!>>')
         #print 'formatDB ' + str(boolean)
         return boolean

     
     def printError(self, result):
         if len(result[1].strip())>0:
           print "Remote Execution Error:"
           print "<-----------------------"
           for line in result:
              print "    " + line
           print "----------------------->"
        
     def printResponse(self, result):
         if len(result[0].strip())>0:
           print "Remote Execution Response:"
           print "<======================="
           for line in result:
              print "    " + line
           print "=======================>"
        
     def copy_file(self, source, target):
         user, server = self.getUserServer()
         if PATHDELIM=='\\':
             args = [self.SCP] + WIN_RSA_KEY_ARGS + [source, user+'@'+server+':'+ target]
         else:
             args  = self.buildSSHLogin('scp')  
             args += [source , user+'@'+server+':'+ target]
             
         p = self.create_a_process(args)  
         result = p.communicate()[0].strip()
         if result=='':
            return True
         else:
            return False
          
     def copy_file_back(self, source, target):
         user, server = self.getUserServer()
         if PATHDELIM=='\\':
            args = [self.SCP] + WIN_RSA_KEY_ARGS + [user+'@'+server+':'+ source, target]
         else: 
            args  = self.buildSSHLogin('scp')  
            args += [user+'@'+server+':~/'+ source, target]
     
         #print ' '.join(args)
         p = self.create_a_process(args)  
         result = p.communicate()[0].strip()
         if result=='':
            return True
         else:
            return False
     
     def __remote_Submit(self, J, working_dir='~'):
         args  = self.buildSSHLogin()  
         if PATHDELIM =='\\':
             args = args + WIN_RSA_KEY_ARGS

         args += ['python', 'daemon.py','--home-dir', working_dir, '--submit-job',\
                J.a, '--sample-name' , J.S,  '--algorithm', J.m, '--dbname', J.d,\
                '--submit-string', "\""+self.sub_string +"\"", '--submission-type', self.submission_type]
         command = ' '.join(args)

         p = self.create_a_process(args)  
         result = p.communicate()
         (boolean, message)  = self._interpret_results(result, '<<Success!>>')

         if boolean == False and self.submission_type=='0':
            self.submission_type='1' 

         return boolean, str(result), command

     
     def get_number_of_running_jobs(self, sample_name, algorithm):
         args  = self.buildSSHLogin()  
         args += ['python', 'MetaPathways/' + sample_name + '/daemon.py','--home-dir', '\'\'', '--get-number-of-running-jobs', user]
         p = self.create_a_process(args)  
         result = p.communicate()
         if result[1].strip()=='':
            try:
               return  int(result[0].strip())
            except:
               return 0
         else:
            return 0
     
     def databaseFiles(self, sourcedir, regPattern):
        files = [ re.sub(r'.*\/','',f) for f in glob(sourcedir + PATHDELIM + '*')  if regPattern.search(f) ] 
        return files
     
     #def retrieve_results(sample_name, dbname):
     
     def create_a_process(self, args):
         p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         return p
     
     
     class empty(object):
         pass
     
     def blastgrid(argv):
     
         opts = empty()
         for key, value in argv.items():
             setattr(opts, key, value)
               
     
         for dbname in opts.dbnames: 
            target = opts.sample_name +  PATHDELIM + 'blast_results' + PATHDELIM 
            source = MetaPathways + sample_name + '/' + sample_name + '.' + db_output_suffix
     
            copy_file_back(source, target)
     
         #print "Completed calculation!"
     
             

     if PATHDELIM=='/':
       SSH = 'ssh'
       SCP = 'scp'
     else:
       SSH = 'executables' + PATHDELIM + 'win' + PATHDELIM + 'bit64' + PATHDELIM + 'plink'
       SCP = 'executables' + PATHDELIM + 'win' + PATHDELIM + 'bit64' + PATHDELIM + 'pscp'
     
     def  __init__(self, gridParams):
         self.user = None
         self.service = None
         self.walltime= None
         self.remoteActiveFiles  = {}
         self.remoteActiveFolders= {}
         self.remoteFormattedDBs = {}
         self.serverUp = False 
         self.isaws=False
         self.awsparams={}
         self.remote_home_dir = '~'
         self.working_directory='~'
         self.base_output_folder= None
         self.max_parallel_jobs = 4
         self.os = 'mac'
         self.bits = 'bit64'
         self.type ='ordinary'
         self.sub_string = ""

         self.performance = Performance()
         self.A =1
         self.B =1
         self.C =1
         self.D =1

         try:
           for key, value in gridParams.iteritems():
              setattr(self, key, value) 
         except:
            print "ERROR : in  creating BlastService"
            pass

            
         if not self.user:
              self.messagelogger.write("ERROR: User for Grid service not specified\n")
              sys.exit(0)

         if not self.server:
              self.messagelogger.write("ERROR: Service for Grid service not specified\n")
              sys.exit(0)

         self.Folders = [ 'MetaPathways', 'MetaPathways/databases',  'MetaPathways/executables',  'MetaPathways/samples', \
                  'MetaPathways/samples/.qstatdir']
         self.Files = {  
                 'blastp': [ 'executables' + PATHDELIM + self.os + PATHDELIM + self.bit, 'MetaPathways/executables/'],
                 'blastn': [ 'executables' + PATHDELIM + self.os + PATHDELIM + self.bit, 'MetaPathways/executables/'],\
                 'lastal': [ 'executables' + PATHDELIM + self.os + PATHDELIM + self.bit, 'MetaPathways/executables/'],\
                 'lastdb': [ 'executables' + PATHDELIM + self.os + PATHDELIM + self.bit, 'MetaPathways/executables/'],\
                 'makeblastdb': [ 'executables' + PATHDELIM + self.os + PATHDELIM + self.bit, 'MetaPathways/executables/'],\
                 'daemon.py': [ 'libs' + PATHDELIM + 'python_scripts', '']
              }


     def isUp(self):
         user, server = self.user, self.server
         if  self.__remote_check_if_server_is_up(user , server ):
            return True
         else:
            return False

     def set_sample_name(self, sample_name):
         self.sample_name = sample_name

     def set_base_output_folder(self, base_output_folder):
         self.base_output_folder = base_output_folder

     def submitJob(self, J):
         if not self.isInputReady(J):
            self.messagelogger.write("ERROR: Input not ready (%s %s %s %s)" %(J.S, J.d, J.a, J.m, self.server))
            return False
         status = self.submit(J)
         if not status[0]:
            print "failed to submit " + "%s %s %s %s %s" %(J.S, J.d, J.a, J.m, self.server)
            print "ERROR: Remote error message " + status[1]
            print "INFO: command : " + status[2]
            return False

         #print "submitted " + "%s %s %s %s %s" %(J.S, J.d, J.a, J.m, self.server)
         return True

     def submit(self, J):
         return self.__remote_Submit(J, working_dir = self.working_directory)

     def areNewResultsAvailable(self, sample_names, algorithm):
          remote_Samples_dir  ='MetaPathways/samples/' 
          for sample_name in sample_names:
               filepattern = remote_Samples_dir + '/' + sample_name + '/' +  '*' + '.' + algorithm.upper() 
               if self.__remote_doesFilePatternExist(filepattern, working_dir = self.working_directory ):
                 return True
          return False

     def isInputReady(self, J):
         sampleDir = 'MetaPathways/samples/' + J.S 
         Folders = [ sampleDir, sampleDir + '/.qstatdir' ]
         # check if the remote folders are active
         for target in Folders:
           if not target in self.remoteActiveFolders:
             if not self.__remote_doesFolderExist(target, working_dir = self.working_directory):
                 self.__remote_createFolder(target, working_dir = self.working_directory)
                 if  self.__remote_doesFolderExist(target, working_dir = self.working_directory):
                    self.remoteActiveFolders[target] = True
                    self.messagelogger.write("SUCCESS: Successfully created remote folder \"%s\" in Server \"%s\"!\n" %(target, self.server))
                 else:
                    self.messagelogger.write("ERROR: Cannot create remote folder \"%s\" in Server \"%s\"!\n" %(target, self.server))
                    return False
             else:
                 self.messagelogger.write("OK: Found already created remote folder \"%s\" in Server \"%s\"!\n" %(target, self.server))
                 self.remoteActiveFolders[target] = True

         source = self.base_output_folder +  PATHDELIM + J.S + PATHDELIM + 'blast_results' + PATHDELIM + 'grid' + PATHDELIM + 'split_batch' + PATHDELIM + J.a
         target  =  'MetaPathways/samples/' + J.S + '/' + J.a
         if not J.a in self.remoteActiveFiles and  not self.__remote_doesFileExist(target, working_dir = self.working_directory):
             self.__remote_copy_file(source, self.working_directory + '/' + target)
             if not self.__remote_doesFileExist(target, working_dir = self.working_directory):
                self.messagelogger.write("ERROR: Cannot upload query file \"%s\" to server \"%s\"!\n" %(J.a, self.server))
                return False
             else:
                self.remoteActiveFiles[J.a]=True

         source = self.blast_db_folder +  PATHDELIM + J.d
         target  =  'MetaPathways/databases/'  + J.d
        
         if not J.d in self.remoteActiveFiles and  not self.__remote_doesFileExist(target, working_dir = self.working_directory):
             if not path.exists(source):
                self.messagelogger.write("ERROR: DB file \"%s\" for DB \"%s\" is missing, required to upload to server \"%s\"!\n" %(source, J.d, self.server))
                return False

             self.messagelogger.write("STATUS: Uploading  DB file \"%s\" to server \"%s\"!\n" %(J.d, self.server))
             self.__remote_copy_file(source, self.working_directory + '/' + target)
             if not self.__remote_doesFileExist(target, working_dir = self.working_directory):
                self.messagelogger.write("ERROR: Cannot upload DB file \"%s\" to server \"%s\"!\n" %(J.d, self.server))
                return False
             else:
                self.messagelogger.write("OK: Founde already Uploaded  DB file \"%s\" to server \"%s\"!\n" %(J.d, self.server))
                self.remoteActiveFiles[J.d]=True


         remote_DB_dir  ='MetaPathways/databases/' 
         if not J.d in self.remoteFormattedDBs: 
           
           if  not self.__remote_isDBFormatted(remote_DB_dir, J.d, J.m, working_dir = self.working_directory ):
             self.messagelogger.write("WARNING: DB \"%s\" is not formatted on server \"%s\" for algorithm \"%s\"!\n" %(J.d, self.server, J.m))
             if self.__remote_formatDB(remote_DB_dir +'/' + J.d, J.m, working_dir = self.working_directory)  and  self.__remote_isDBFormatted(remote_DB_dir, J.d, J.m, working_dir = self.working_directory ):
                self.messagelogger.write("SUCCESS: Successfully formatted  DB \"%s\" on server \"%s\" for algorithm \"%s\"!\n" %(J.d, self.server, J.m))
                self.remoteFormattedDBs[J.d]=True
             else:
                self.messagelogger.write("ERROR: Failed to format  DB \"%s\"  on server \"%s\" for algorithm \"%s\"!\n" %(J.d, self.server, J.m))
                return False
           else:
             self.messagelogger.write("OK: DB \"%s\" is already formatted on server \"%s\" for algorithm \"%s\"!\n" %(J.d, self.server, J.m))
             self.remoteFormattedDBs[J.d]=True
    
         return True
           

     def isReadyToSubmit(self, load):
         self.A =  max(self.A-1, 1)

         if self.A > 1:
            return False

         if not self.isSetUp():
            self.B = min(2*self.B, 100)
            self.A = self.B
            return False
         else:
            if self.isQFull(load):
               self.B = min(2*self.B, 100)
               self.A = self.B
               return False
            else:
               self.B = self.A
               return True
         return True
      
     def isReadyToHarvest(self, sample_names, algorithm):
         self.C =  max(self.C-1, 1)
         if self.C > 1:
            return False
         if not self.isSetUp():
            self.D = min(2*self.D, 100)
            self.C = self.D
            return False
         else:
            if not self.areNewResultsAvailable(sample_names, algorithm):
               self.D = min(2*self.D, 100)
               self.C = self.D
               return False
            else:
               self.D = self.C
               return True
         return True

     def splitResultName(self, _a_result):
         
         fields = [ x.strip() for x in _a_result.split('.') ]
         if len(fields) != 3:
            return (None, None,  None)
         return (fields[0], fields[1], fields[2])

     def  job_Was_Submitted_By_Current_Server(self, sample_name, split_name,  dbname, algorithm, list_jobs_submitted) :
         if not sample_name in list_jobs_submitted:
            return False 

         if not dbname in list_jobs_submitted[sample_name]:
            return False 

         if not split_name in list_jobs_submitted[sample_name][dbname]:
            return False 

         if not algorithm in list_jobs_submitted[sample_name][dbname][split_name]:
            return False 

         if not self.server in list_jobs_submitted[sample_name][dbname][split_name][algorithm]:
            return False 

         return True


     def harvest(self, sample_names, algorithm, list_jobs_submitted):
          _all_results = []
          remote_Samples_dir  ='MetaPathways/samples/' 
          for sample_name in sample_names:
              filepattern = remote_Samples_dir + '/' + sample_name + '/' +  '__DELIMITER__' +  algorithm 
              _results = self.__remote_getFileNamesWithPattern(filepattern, working_dir = self.working_directory )
              for _a_result in _results:
                 split_name, dbname, algorithm = self.splitResultName(_a_result)
                 if not  self.job_Was_Submitted_By_Current_Server(sample_name, split_name,  dbname, algorithm, list_jobs_submitted): 
                   continue
                 
                 source = self.working_directory + '/' + remote_Samples_dir + '/'  + sample_name + '/' + _a_result
                 localTargetFile = self.base_output_folder+ PATHDELIM + sample_name + PATHDELIM +  'blast_results' + PATHDELIM + 'grid' + PATHDELIM + 'split_results' + PATHDELIM  + _a_result
                 if self.__remote_DownloadFile(source, localTargetFile):
                    nL = self.__local_No_of_Lines(localTargetFile)
                    remoteFile = remote_Samples_dir + '/' + sample_name + '/' + _a_result
                    nR = self.__remote_No_of_Lines(remoteFile, working_dir=self.working_directory)
                    #print "Lines check ", nL, nR
                    if nR == nL:
                        #remoteFile = remote_Samples_dir + '/' + sample_name + '/' + '_a_result'
                        if self.__remote_deleteRemoteFile(remoteFile, working_dir = self.working_directory):
                           J = Job(sample_name, dbname, split_name, algorithm)
                           _all_results.append(J) 
                 else:
                     self.messagelogger.write("ERROR: Failed to download file \"%s\"  from server \"%s\"!\n" %(source,  self.server))

          return _all_results 
         
     def isQFull(self, size):
         if size >= int(self.max_parallel_jobs):
           return True
         else:
           return False



     def isSetUp(self):
         if  not self.isUp():
            self.messagelogger.write("ERROR: Cannot connect to  Server \"%s\"! Perhaps it is down!\n" %(self.server))
            return False

        # copy the daemon.py file
         daemonFile = self.MetaPathwaysDir  + PATHDELIM + 'libs' + PATHDELIM + 'python_scripts' +  PATHDELIM + 'daemon.py'
         daemonFileRemote =  'daemon.py'
         #print  self.server + ' ' +  daemonFile + '   ' +  self.remote_home_dir + '/' + daemonFileRemote
         #print self.remoteActiveFiles
         if not daemonFileRemote in self.remoteActiveFiles:
              #if not self.__remote_doesFileExist(daemonFileRemote):
              if self.__remote_copy_file(daemonFile, self.remote_home_dir + '/' + daemonFileRemote) :
                  self.messagelogger.write("SUCCESS: Successfully uploaded the daemon file to  in Server \"%s\"!\n" %(self.server))
              else:
                  self.messagelogger.write("ERROR: Could not upload daemon file to  in Server \"%s\"!\n" %(self.server))
                  return False

              self.remoteActiveFiles[daemonFileRemote] = True

        # create the folders under the working directory
         for  f in self.Folders:   
           if not f in self.remoteActiveFolders:
              self.messagelogger.write("MESSAGE: Checking remote folder \"%s\" in Server \"%s\"!\n" %(f, self.server))
              if not self.__remote_doesFolderExist(f, working_dir = self.working_directory):
                   if self.__remote_createFolder(f, working_dir = self.working_directory):
                      self.remoteActiveFolders[f] = True
                      self.messagelogger.write("SUCCESS: Successfully created remote folder \"%s\" in Server \"%s\"!\n" %(f, self.server))
                   else:
                      self.messagelogger.write("ERROR: Cannot create remote folder \"%s\" in Server \"%s\"!\n" %(f, self.server))
                      return False
              else:
                 self.messagelogger.write("STATUS: Found existing remote folder \"%s\" in Server \"%s\"!\n" %(f, self.server))
                 self.remoteActiveFolders[f] = True


         for  f in self.Files:   
           if not f in self.remoteActiveFiles: 
             if not self.__remote_doesFileExist(self.Files[f][1] + '/' + f, working_dir = self.working_directory):
                  #print self.working_directory + '/' +   self.Files[f][1] + '/' + f
                  self.messagelogger.write("STATUS: Creating/Uploading remote file \"%s\" in Server \"%s\"!\n" %(f, self.server))
                  if self.__remote_createFile(f, working_dir = self.working_directory ):
                     self.remoteActiveFiles[f] = True
                     self.messagelogger.write("SUCCESS: Successfully created/uploaded remote file \"%s\" in Server \"%s\"!\n" %(f, self.server))
                  else:
                     self.messagelogger.write("ERROR: Cannot create/upload remote file \"%s\" in Server \"%s\"!\n" %(f, self.server))
                     return False
             else:
                self.remoteActiveFiles[f] = True
    
         return True


     def deleteRemoteSampleFolders(self, samples):
         if  self.isSetUp():
            if self.__remote_Remove_Sample_Folders(samples, working_dir = self.working_directory):
                self.messagelogger.write("SUCCESS: Successfully removed sample folders in Server \"%s\"!\n" %(self.server))
            else:
                self.messagelogger.write("WARNING: Faild to remove sample folders in Server \"%s\"!\n" %(self.server))
                self.messagelogger.write("WARNING: Please DO NOT forget to remove the stale folders manually in Server \"%s\"!\n" %(self.server))