Exemple #1
0
    def setup(
        self,
        login=None,
        password=None,
        service_url="https://bramka.play.pl",
        login_url="https://logowanie.play.pl/p4-idp2/LoginForm.do",
        logout_url="https://logowanie.play.pl/p4-idp2/LogoutUser",
    ):
        self.SERVICE_URL = service_url
        self.LOGIN_URL = login_url
        self.LOGOUT_URL = logout_url
        self.MY_PHONE_NUMBER = login
        self.MY_PASSWORD = password

        web.config("readonly_controls_writeable", True)
        web.agent(self.MY_HTTP_AGENT)

        web.go(self.SERVICE_URL)
        web.submit()
        web.code(200)
        web.formvalue("loginForm", "login", self.MY_PHONE_NUMBER)
        web.formvalue("loginForm", "password", self.MY_PASSWORD)
        web.submit()
        web.code(200)
        self._retry_find("editableSmsComposeForm", 5)
Exemple #2
0
def main():
#--------------------------------------
    twillc.agent('Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:28.0)' +
                 ' Gecko/20100101 Firefox/28.0')

    umcu = UMCUWeb()
    umcu.login_interactive()

    gmail = GMailAccount()
    gmail.login_interactive()

    hist = CardHoldHistory()
    
    print '\nBegin monitoring holds ...'
    print 'Notifications will be sent to ' + gmail.username + '\n\n'

    # def relative_day(date):
    #     delta = datetime.datetime.today() - date
    #     if delta.days == 0:
    #         return 'today'
    #     else if delta.days == 1:
    #         return 'yesterday'
    #     else
    #         return ('%d days ago' % delta.days)


    # Start monitoring holds
    while True:
        umcu.login() 
        posted = umcu.get_posted_holds()

        # Are there duplicate posts?
        duplicates = [k for k, v in Counter(posted).items() if v > 1]
        if len(duplicates) > 0:
            msg = 'The following holds may have been posted multiple times to your account:\n'
            for hold in duplicates:
                msg += '    ' + '  '.join(hold.split('|')) + '\n'
            msg += '(notification by cardamon)'
            gmail.send_email([gmail.username], 'Duplicate Holds', msg)

        new_holds = hist.merge(posted)

        if len(new_holds) > 0:
            for hold in new_holds:
                print '  Notifying: ' + hold
                info = hold.split('|')
                subject = '%s: %s' % (info[-1], info[2])

                msg = '''%s on %s for account %s''' % (info[0], info[3], info[1])
                msg += '\n(notification by cardamon)'
                gmail.send_email([gmail.username], subject, msg)
        
        time.sleep(15*60) # Refresh every 15 mins
Exemple #3
0
def configure_twill(tc):
    """Configure twill to be used by LinkChecker.
    Note that there is no need to set a proxy since twill uses the same
    ones (provided from urllib) as LinkChecker does.
    """
    # make sure readonly controls are writeable (might be needed)
    tc.config("readonly_controls_writeable", True)
    # fake IE 6.0 to talk sense into some sites (eg. SourceForge)
    tc.agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)")
    # tell twill to shut up
    tc.OUT = dummy.Dummy()
    from twill import browser
    browser.OUT = dummy.Dummy()
    # set debug level
    if log.is_debug(LOG_CHECK):
        tc.debug("http", 1)
Exemple #4
0
def configure_twill (tc):
    """Configure twill to be used by LinkChecker.
    Note that there is no need to set a proxy since twill uses the same
    ones (provided from urllib) as LinkChecker does.
    """
    # make sure readonly controls are writeable (might be needed)
    tc.config("readonly_controls_writeable", True)
    # fake IE 6.0 to talk sense into some sites (eg. SourceForge)
    tc.agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)")
    # tell twill to shut up
    tc.OUT = dummy.Dummy()
    from twill import browser
    browser.OUT = dummy.Dummy()
    # set debug level
    if log.is_debug(LOG_CHECK):
        tc.debug("http", 1)
Exemple #5
0
def goplurk():
	from twill.commands import go, showforms, fv, submit, agent, showlinks,save_html
	from twill import get_browser
	b=get_browser()
	b._browser._factory.is_html = True
	browser = b
	agent('moz17')
	go("http://www.plurk.com/")
	showforms()
	fv("1","nick_name","PUT_YOUR_NICKNAME")
	fv("1","password","PUT_YOU_PASSWORD")
	submit('')
	go("http://www.plurk.com/m")
	showforms()
	fv("1","content",ccontent)	
	submit('submit_plurk')
	showforms()
	return
Exemple #6
0
def annotate(params, proteins, \
             url="http://signalfind.org/tatfind.html", force=False):
    """
    Interfaces with the TatFind web service at (http://signalfind.org/tatfind.html)
    to predict if protein sequences contain Twin-Arginine Translocation (Tat)
    signal peptides.
    """
    # set the user-agent so web services can block us if they want ... :/
    python_version = sys.version.split()[0]
    agent("Python-urllib/%s (twill; inmembrane)" % python_version)

    outfn = 'tatfind.out'
    log_stderr("# TatFind(web) %s > %s" % (params['fasta'], outfn))

    if not force and os.path.isfile(outfn):
        log_stderr("# -> skipped: %s already exists" % outfn)
        fh = open(outfn, 'r')
        proteins = parse_tatfind_output(fh, proteins)
        fh.close()
        return proteins

    # dump extraneous output into this blackhole so we don't see it
    if not __DEBUG__: twill.set_output(StringIO.StringIO())

    go(url)
    if __DEBUG__: showforms()
    formfile("1", "seqFile", params["fasta"])
    submit()
    if __DEBUG__: show()

    tatfind_output = show()
    if __DEBUG__: log_stderr(tatfind_output)

    # write raw TatFind output to a file
    fh = open(outfn, 'w')
    fh.write(tatfind_output)
    fh.close()

    proteins = parse_tatfind_output(tatfind_output.split("\n"), proteins)

    return proteins
Exemple #7
0
def annotate(params, proteins, \
             url="http://signalfind.org/tatfind.html", force=False):
    """
    Interfaces with the TatFind web service at (http://signalfind.org/tatfind.html)
    to predict if protein sequences contain Twin-Arginine Translocation (Tat)
    signal peptides.
    """
    # set the user-agent so web services can block us if they want ... :/
    python_version = sys.version.split()[0]
    agent("Python-urllib/%s (twill; inmembrane)" % python_version)

    outfn = 'tatfind.out'
    log_stderr("# TatFind(web) %s > %s" % (params['fasta'], outfn))

    if not force and os.path.isfile(outfn):
        log_stderr("# -> skipped: %s already exists" % outfn)
        fh = open(outfn, 'r')
        proteins = parse_tatfind_output(fh, proteins)
        fh.close()
        return proteins

    # dump extraneous output into this blackhole so we don't see it
    if not __DEBUG__: twill.set_output(StringIO.StringIO())

    go(url)
    if __DEBUG__: showforms()
    formfile("1", "seqFile", params["fasta"])
    submit()
    if __DEBUG__: show()

    tatfind_output = show()
    if __DEBUG__: log_stderr(tatfind_output)

    # write raw TatFind output to a file
    fh = open(outfn, 'w')
    fh.write(tatfind_output)
    fh.close()

    proteins = parse_tatfind_output(tatfind_output.split("\n"), proteins)

    return proteins
Exemple #8
0
    def setup(
        self,
        login=None,
        password=None,
        service_url="/portal/map/map/message_box",
        login_url="http://www.orange.pl/zaloguj.phtml",
    ):

        self.SERVICE_URL = service_url
        self.LOGIN_URL = login_url
        self.MY_PHONE_NUMBER = login
        self.MY_PASSWORD = password

        web.agent(self.MY_HTTP_AGENT)
        web.go(self.LOGIN_URL)
        web.code(200)
        web.formvalue("loginForm", "login", self.MY_PHONE_NUMBER)
        web.formvalue("loginForm", "password", self.MY_PASSWORD)
        web.submit()
        web.code(200)
        web.find(self.SERVICE_URL)
Exemple #9
0
    def setup(
        self,
        login=None,
        password=None,
        service_url="/myv/messaging/webtext/",
        login_url="https://www.vodafone.ie/myv/services/login/index.jsp",
        logout_url="/myv/services/logout/Logout.shtml",
    ):

        self.SERVICE_URL = service_url
        self.LOGIN_URL = login_url
        self.LOGOUT_URL = logout_url
        self.MY_PHONE_NUMBER = login
        self.MY_PASSWORD = password

        web.agent(self.MY_HTTP_AGENT)
        web.go(self.LOGIN_URL)
        web.code(200)
        web.formvalue("Login", "username", self.MY_PHONE_NUMBER)
        web.formvalue("Login", "password", self.MY_PASSWORD)
        web.submit()
        web.code(200)
        web.find(self.SERVICE_URL)
Exemple #10
0
# find/import twill
lib_dir = os.path.join(scripts_dir, "..", "lib")
eggs_dir = os.path.join(scripts_dir, "..", "eggs",
                        "py%s-noplatform" % sys.version[:3])
sys.path.append(lib_dir)
sys.path.append(eggs_dir)
import pkg_resources
pkg_resources.require("twill")
import twill
import twill.commands as tc

# default timeout for twill browser is never
socket.setdefaulttimeout(300)

# user-agent
tc.agent("Mozilla/5.0 (compatible; check_galaxy/0.1)")
tc.config('use_tidy', 0)


class Browser:
    def __init__(self):
        self.server = server
        self.maint = maint
        self.tool = None
        self.tool_opts = None
        self.id = None
        self.status = None
        self.check_file = None
        self.hid = None
        self.cookie_jar = os.path.join(var_dir, "cookie_jar")
        dprint("cookie jar path: %s" % self.cookie_jar)
Exemple #11
0
        if debug:
            print "Specified -n, will create a new history"
        new_history = True
    else:
        usage()

# state information
var_dir = os.path.join( os.path.expanduser('~'), ".check_galaxy", server )
if not os.access( var_dir, os.F_OK ):
    os.makedirs( var_dir, 0700 )

# default timeout for twill browser is never
socket.setdefaulttimeout(300)

# user-agent
tc.agent("Mozilla/5.0 (compatible; check_galaxy/0.1)")
tc.config('use_tidy', 0)


class Browser:
    def __init__(self):
        self.server = server
        self.tool = None
        self.tool_opts = None
        self._hda_id = None
        self._hda_state = None
        self._history_id = None
        self.check_file = None
        self.cookie_jar = os.path.join( var_dir, "cookie_jar" )
        dprint("cookie jar path: %s" % self.cookie_jar)
        if not os.access(self.cookie_jar, os.R_OK):
Exemple #12
0
def annotate(params, proteins, \
             force=False):
    """
    DEPRECATED: The TMB-HUNT server appears to be permanently offline.

    Uses the TMB-HUNT web service
    (http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/aaTMB_Hunt.cgi) to
    predict if proteins are outer membrane beta-barrels.

    NOTE: In my limited testing, TMB-HUNT tends to perform very poorly in
          terms of false positives and false negetives. I'd suggest using only
          BOMP.
    """
    # TODO: automatically split large sets into multiple jobs
    #       TMB-HUNT will only take 10000 seqs at a time
    if len(proteins) >= 10000:
        log_stderr(
            "# ERROR: TMB-HUNT(web): can't take more than 10,000 sequences.")
        return

    # set the user-agent so web services can block us if they want ... :/
    python_version = sys.version.split()[0]
    agent("Python-urllib/%s (twill; inmembrane)" % python_version)

    out = 'tmbhunt.out'
    log_stderr("# TMB-HUNT(web) %s > %s" % (params['fasta'], out))

    if not force and os.path.isfile(out):
        log_stderr("# -> skipped: %s already exists" % out)
        return parse_tmbhunt(proteins, out)

    # dump extraneous output into this blackhole so we don't see it
    if not __DEBUG__: twill.set_output(StringIO.StringIO())

    go("http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/aaTMB_Hunt.cgi"
       )
    if __DEBUG__: showforms()

    # read up the FASTA format seqs
    fh = open(params['fasta'], 'r')
    fasta_seqs = fh.read()
    fh.close()

    # fill out the form
    fv("1", "sequences", fasta_seqs)

    submit()
    if __DEBUG__: showlinks()

    # small jobs will lead us straight to the results, big jobs
    # go via a 'waiting' page which we skip past if we get it
    job_id = None
    try:
        # we see this with big jobs
        result_table_url = follow(
            "http://www.bioinformatics.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/tmp_output.*.html"
        )
        job_id = result_table_url.split('tmp_output')[-1:][0].split('.')[0]
    except:
        # small jobs take us straight to the html results table
        pass

    # parse the job_id from the url, since due to a bug in
    # TMB-HUNT the link on the results page from large jobs is wrong
    if not job_id:        job_id = \
follow("Full results").split('/')[-1:][0].split('.')[0]
    log_stderr(
        "# TMB-HUNT(web) job_id is: %s <http://www.bioinformatics.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/tmp_output%s.html>"
        % (job_id, job_id))

    # polling until TMB-HUNT finishes
    # TMB-HUNT advises that 4000 sequences take ~10 mins
    # we poll a little faster than that
    polltime = (len(proteins) * 0.1) + 2
    while True:
        log_stderr("# TMB-HUNT(web): waiting another %i sec ..." % (polltime))
        time.sleep(polltime)
        try:
            go("http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/%s.txt"
               % (job_id))
            break
        except:
            polltime = polltime * 2

        if polltime >= 7200:  # 2 hours
            log_stderr("# TMB-HUNT error: Taking too long.")
            return

    txt_out = show()

    # write raw TMB-HUNT results
    fh = open(out, 'w')
    fh.write(txt_out)
    fh.close()

    return parse_tmbhunt(proteins, out)
Exemple #13
0
def annotate(params, proteins, \
             url="http://services.cbu.uib.no/tools/bomp/", force=False):
    """
    Uses the BOMP web service (http://services.cbu.uib.no/tools/bomp/) to
    predict if proteins are outer membrane beta-barrels.
    """
    # set the user-agent so web services can block us if they want ... :/
    python_version = sys.version.split()[0]
    agent("Python-urllib/%s (twill; inmembrane/%s)" %
          (python_version, inmembrane.__version__))

    bomp_out = 'bomp.out'
    log_stderr("# BOMP(web) %s > %s" % (params['fasta'], bomp_out))

    if not force and os.path.isfile(bomp_out):
        log_stderr("# -> skipped: %s already exists" % bomp_out)
        bomp_categories = {}
        fh = open(bomp_out, 'r')
        for l in fh:
            words = l.split()
            bomp_category = int(words[-1:][0])
            seqid = parse_fasta_header(l)[0]
            proteins[seqid]['bomp'] = bomp_category
            bomp_categories[seqid] = bomp_category
        fh.close()
        return bomp_categories

    # dump extraneous output into this blackhole so we don't see it
    if not __DEBUG__: twill.set_output(StringIO.StringIO())

    go(url)
    if __DEBUG__: showforms()
    formfile("1", "queryfile", params["fasta"])
    submit()
    if __DEBUG__: show()

    # extract the job id from the page
    links = showlinks()
    job_id = None
    for l in links:
        if l.url.find("viewOutput") != -1:
            # grab job id from "viewOutput?id=16745338"
            job_id = int(l.url.split("=")[1])

    if __DEBUG__: log_stderr("BOMP job id: %d" % job_id)

    if not job_id:
        # something went wrong
        log_stderr("# BOMP error: Can't find job id")
        return

    # parse the HTML table and extract categories
    go("viewOutput?id=%i" % (job_id))

    polltime = 10
    log_stderr("# Waiting for BOMP to finish .")
    while True:
        try:
            find("Not finished")
            log_stderr(".")
        except:
            # Finished ! Pull down the result page.
            log_stderr(". done!\n")
            go("viewOutput?id=%i" % (job_id))
            if __DEBUG__: log_stderr(show())
            break

        # Not finished. We keep polling for a time until
        # we give up
        time.sleep(polltime)
        polltime = polltime * 2
        if polltime >= 7200:  # 2 hours
            log_stderr("# BOMP error: Taking too long.")
            return
        go("viewOutput?id=%i" % (job_id))
        if __DEBUG__: log_stderr(show())

    bomp_html = show()
    if __DEBUG__: log_stderr(bomp_html)

    # Results are in the only <table> on this page, formatted like:
    # <tr><th>gi|107836852|gb|ABF84721.1<th>5</tr>
    soup = BeautifulSoup(bomp_html)
    bomp_categories = {}  # dictionary of {name, category} pairs
    for tr in soup.findAll('tr')[1:]:
        n, c = tr.findAll('th')
        name = parse_fasta_header(n.text.strip())[0]
        category = int(c.text)
        bomp_categories[name] = category

    # write BOMP results to a tab delimited file
    fh = open(bomp_out, 'w')
    for k, v in bomp_categories.iteritems():
        fh.write("%s\t%i\n" % (k, v))
    fh.close()

    if __DEBUG__: log_stderr(str(bomp_categories))

    # label proteins with bomp classification (int) or False
    for name in proteins:
        if "bomp" not in proteins[name]:
            if name in bomp_categories:
                category = int(bomp_categories[name])
                proteins[name]['bomp'] = category
            else:
                proteins[name]['bomp'] = False

    if __DEBUG__: log_stderr(str(proteins))

    return bomp_categories

    """
def annotate(params, proteins, \
             url="http://rbf.bioinfo.tw/"+
                 "~sachen/OMPpredict/"+
                 "TMBETADISC-RBF-Content.html", force=False):
  """
  Interfaces with the TatFind web service at 
  (http://rbf.bioinfo.tw/~sachen/OMPpredict/TMBETADISC-RBF.php) 
  to predict if protein sequence is likely to be an outer membrane beta-barrel.
  
  Note that the default URL we use it different to the regular form used
  by web browsers, since we need to bypass some AJAX fun.
  """
  # TODO: automatically split large sets into multiple jobs
  #       since TMBETADISC seems to not like more than take 
  #       ~5000 seqs at a time
  if len(proteins) >= 5000:
    log_stderr("# ERROR: TMBETADISC-RBF(web): tends to fail with > ~5000 sequences.")
    return
  
  # set the user-agent so web services can block us if they want ... :/
  python_version = sys.version.split()[0]
  agent("Python-urllib/%s (twill; inmembrane)" % python_version)
  
  outfn = 'tmbetadisc-rbf.out'
  log_stderr("# TMBETADISC-RBF(web) %s > %s" % (params['fasta'], outfn))
  
  if not force and os.path.isfile(outfn):
    log_stderr("# -> skipped: %s already exists" % outfn)
    fh = open(outfn, 'r')
    proteins = parse_tmbetadisc_output(fh.read(), proteins)
    fh.close()
    return proteins
  
  # dump extraneous output into this blackhole so we don't see it
  if not __DEBUG__: twill.set_output(StringIO.StringIO())
  
  go(url)
  if __DEBUG__: showforms()
  formfile("1", "userfile", params["fasta"])
  fv("1", "format", "file")

  # set the user defined method
  method_map = {"aa":"Amino Acid Composition",
                "dp":"Depipetide Composition",
                "aadp":"Amino Acid & Depipetide Composition",
                "pssm":"PSSM"}
  if dict_get(params, 'tmbetadisc_rbf_method'):
    try:
      method = method_map[params['tmbetadisc_rbf_method']]
    except KeyError:
      log_stderr("# ERROR: Invalid setting from tmbetadisc_rbf_method. \
                    Must be set to aa, dp, aadp or pssm.")
      sys.exit()

  #fv("1", "select", "Amino Acid Composition")
  #fv("1", "select", "Depipetide Composition")
  #fv("1", "select", "Amino Acid & Depipetide Composition")
  #fv("1", "select", "PSSM")
  fv("1", "select", method)
  
  submit()
  
  waiting_page = show()
  if __DEBUG__: log_stderr(waiting_page)

  for l in waiting_page.split('\n'):
    if l.find("TMBETADISC-RBF-action.php?UniqueName=") != -1:
      result_url = l.split("'")[1]

  time.sleep(5)
  
  go(result_url)
  
  output = show()
  if __DEBUG__: log_stderr(output)
  
  # write raw output to a file
  fh = open(outfn, 'w')
  fh.write(output)
  fh.close()
  
  proteins = parse_tmbetadisc_output(output, proteins) 
  
  return proteins
def annotate(params, proteins, \
                   url="http://psfs.cbrc.jp/tmbeta-net/", \
                   category='OM(barrel)',
                   force=False):
    """
  Uses the TMBETA-NET web service (http://psfs.cbrc.jp/tmbeta-net/) to
  predict strands of outer membrane beta-barrels.
  
  By default, category='BARREL' means prediction will only be run
  on proteins in the set with this category property. To process all
  proteins, change category to None.

  These keys are added to the proteins dictionary: 
    'tmbeta_strands' - a list of lists with paired start and end 
                       residues of each predicted strand. 
                       (eg [[3,9],[14,21], ..etc ])
  """

    # set the user-agent so web services can block us if they want ... :/
    python_version = sys.version.split()[0]
    agent("Python-urllib/%s (twill; inmembrane)" % python_version)

    outfile = 'tmbeta_net.out'
    log_stderr("# TMBETA-NET(web) %s > %s" % (params['fasta'], outfile))

    tmbeta_strands = {}
    if not force and os.path.isfile(outfile):
        log_stderr("# -> skipped: %s already exists" % outfile)
        fh = open(outfile, 'r')
        tmbeta_strands = json.loads(fh.read())
        fh.close()
        for seqid in tmbeta_strands:
            proteins[seqid]['tmbeta_strands'] = tmbeta_strands[seqid]

        return tmbeta_strands

    # dump extraneous output into this blackhole so we don't see it
    if not __DEBUG__: twill.set_output(StringIO.StringIO())

    for seqid in proteins:

        # only run on sequences which match the category filter
        if force or \
           (category == None) or \
           (dict_get(proteins[seqid], 'category') == category):
            pass
        else:
            continue

        go(url)
        if __DEBUG__: showforms()
        fv("1", "sequence", proteins[seqid]['seq'])
        submit()
        log_stderr("# TMBETA-NET: Predicting strands for %s - %s\n" \
                          % (seqid, proteins[seqid]['name']))
        out = show()
        time.sleep(1)

        if ("Some query is already running. Please try again." in out):
            log_stderr("# TMBETA-NET(web) error: %s" % (out))
            return {}

        # parse the web page returned, extract strand boundaries
        proteins[seqid]['tmbeta_strands'] = []
        for l in out.split('\n'):
            if __DEBUG__: log_stderr("## " + l)

            if "<BR>Segment " in l:
                i, j = l.split(":")[1].split("to")
                i = int(i.strip()[1:])
                j = int(j.strip()[1:])
                proteins[seqid]['tmbeta_strands'].append([i, j])

                if __DEBUG__:
                    log_stderr("# TMBETA-NET(web) segments: %s, %s" % (i, j))

        tmbeta_strands[seqid] = proteins[seqid]['tmbeta_strands']

    # we store the parsed strand boundaries in JSON format
    fh = open(outfile, 'w')
    fh.write(json.dumps(tmbeta_strands, separators=(',', ':\n')))
    fh.close()

    return tmbeta_strands
Exemple #16
0
def annotate(params, proteins, \
                   url="http://psfs.cbrc.jp/tmbeta-net/", \
                   category='OM(barrel)',
                   force=False):
  """
  Uses the TMBETA-NET web service (http://psfs.cbrc.jp/tmbeta-net/) to
  predict strands of outer membrane beta-barrels.
  
  By default, category='BARREL' means prediction will only be run
  on proteins in the set with this category property. To process all
  proteins, change category to None.

  These keys are added to the proteins dictionary: 
    'tmbeta_strands' - a list of lists with paired start and end 
                       residues of each predicted strand. 
                       (eg [[3,9],[14,21], ..etc ])
  """

  # set the user-agent so web services can block us if they want ... :/
  python_version = sys.version.split()[0]
  agent("Python-urllib/%s (twill; inmembrane)" % python_version)
  
  outfile = 'tmbeta_net.out'
  log_stderr("# TMBETA-NET(web) %s > %s" % (params['fasta'], outfile))
  
  tmbeta_strands = {}
  if not force and os.path.isfile(outfile):
    log_stderr("# -> skipped: %s already exists" % outfile)
    fh = open(outfile, 'r')
    tmbeta_strands = json.loads(fh.read())
    fh.close()    
    for seqid in tmbeta_strands:
      proteins[seqid]['tmbeta_strands'] = tmbeta_strands[seqid]
      
    return tmbeta_strands

  # dump extraneous output into this blackhole so we don't see it
  if not __DEBUG__: twill.set_output(StringIO.StringIO())

  for seqid in proteins:
    
    # only run on sequences which match the category filter
    if force or \
       (category == None) or \
       (dict_get(proteins[seqid], 'category') == category):
      pass
    else:
      continue
      
    go(url)
    if __DEBUG__: showforms()
    fv("1","sequence",proteins[seqid]['seq'])
    submit()
    log_stderr("# TMBETA-NET: Predicting strands for %s - %s\n" \
                      % (seqid, proteins[seqid]['name']))
    out = show()
    time.sleep(1)

    if ("Some query is already running. Please try again." in out):
      log_stderr("# TMBETA-NET(web) error: %s" % (out))
      return {}

    # parse the web page returned, extract strand boundaries
    proteins[seqid]['tmbeta_strands'] = []
    for l in out.split('\n'):
      if __DEBUG__: log_stderr("## " + l)

      if "<BR>Segment " in l:
        i,j = l.split(":")[1].split("to")
        i = int(i.strip()[1:])
        j = int(j.strip()[1:])
        proteins[seqid]['tmbeta_strands'].append([i,j])

        if __DEBUG__: log_stderr("# TMBETA-NET(web) segments: %s, %s" % (i, j))

    tmbeta_strands[seqid] = proteins[seqid]['tmbeta_strands']

  # we store the parsed strand boundaries in JSON format
  fh = open(outfile, 'w')
  fh.write(json.dumps(tmbeta_strands, separators=(',',':\n')))
  fh.close()

  return tmbeta_strands
Exemple #17
0
def annotate(params, proteins, \
             url="http://services.cbu.uib.no/tools/bomp/", force=False):
    """
    Uses the BOMP web service (http://services.cbu.uib.no/tools/bomp/) to
    predict if proteins are outer membrane beta-barrels.
    """
    # set the user-agent so web services can block us if they want ... :/
    python_version = sys.version.split()[0]
    agent("Python-urllib/%s (twill; inmembrane/%s)" %
          (python_version, inmembrane.__version__))

    bomp_out = 'bomp.out'
    log_stderr("# BOMP(web) %s > %s" % (params['fasta'], bomp_out))

    if not force and os.path.isfile(bomp_out):
        log_stderr("# -> skipped: %s already exists" % bomp_out)
        bomp_categories = {}
        fh = open(bomp_out, 'r')
        for l in fh:
            words = l.split()
            bomp_category = int(words[-1:][0])
            seqid = parse_fasta_header(l)[0]
            proteins[seqid]['bomp'] = bomp_category
            bomp_categories[seqid] = bomp_category
        fh.close()
        return bomp_categories

    # dump extraneous output into this blackhole so we don't see it
    if not __DEBUG__: twill.set_output(StringIO.StringIO())

    go(url)
    if __DEBUG__: showforms()
    formfile("1", "queryfile", params["fasta"])
    submit()
    if __DEBUG__: show()

    # extract the job id from the page
    links = showlinks()
    job_id = None
    for l in links:
        if l.url.find("viewOutput") != -1:
            # grab job id from "viewOutput?id=16745338"
            job_id = int(l.url.split("=")[1])

    if __DEBUG__: log_stderr("BOMP job id: %d" % job_id)

    if not job_id:
        # something went wrong
        log_stderr("# BOMP error: Can't find job id")
        return

    # parse the HTML table and extract categories
    go("viewOutput?id=%i" % (job_id))

    polltime = 10
    log_stderr("# Waiting for BOMP to finish .")
    while True:
        try:
            find("Not finished")
            log_stderr(".")
        except:
            # Finished ! Pull down the result page.
            log_stderr(". done!\n")
            go("viewOutput?id=%i" % (job_id))
            if __DEBUG__: log_stderr(show())
            break

        # Not finished. We keep polling for a time until
        # we give up
        time.sleep(polltime)
        polltime = polltime * 2
        if polltime >= 7200:  # 2 hours
            log_stderr("# BOMP error: Taking too long.")
            return
        go("viewOutput?id=%i" % (job_id))
        if __DEBUG__: log_stderr(show())

    bomp_html = show()
    if __DEBUG__: log_stderr(bomp_html)

    # Results are in the only <table> on this page, formatted like:
    # <tr><th>gi|107836852|gb|ABF84721.1<th>5</tr>
    soup = BeautifulSoup(bomp_html)
    bomp_categories = {}  # dictionary of {name, category} pairs
    for tr in soup.findAll('tr')[1:]:
        n, c = tr.findAll('th')
        name = parse_fasta_header(n.text.strip())[0]
        category = int(c.text)
        bomp_categories[name] = category

    # write BOMP results to a tab delimited file
    fh = open(bomp_out, 'w')
    for k, v in bomp_categories.iteritems():
        fh.write("%s\t%i\n" % (k, v))
    fh.close()

    if __DEBUG__: log_stderr(str(bomp_categories))

    # label proteins with bomp classification (int) or False
    for name in proteins:
        if "bomp" not in proteins[name]:
            if name in bomp_categories:
                category = int(bomp_categories[name])
                proteins[name]['bomp'] = category
            else:
                proteins[name]['bomp'] = False

    if __DEBUG__: log_stderr(str(proteins))

    return bomp_categories
    """
def annotate(params, proteins, \
             force=False):
  """
  Uses the TMB-HUNT web service 
  (http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/aaTMB_Hunt.cgi) to
  predict if proteins are outer membrane beta-barrels.
  
  NOTE: In my limited testing, TMB-HUNT tends to perform very poorly in
        terms of false positives and false negetives. I'd suggest using only
        BOMP.
  """
  # TODO: automatically split large sets into multiple jobs
  #       TMB-HUNT will only take 10000 seqs at a time
  if len(proteins) >= 10000:
    log_stderr("# ERROR: TMB-HUNT(web): can't take more than 10,000 sequences.")
    return
  
  # set the user-agent so web services can block us if they want ... :/
  python_version = sys.version.split()[0]
  agent("Python-urllib/%s (twill; inmembrane)" % python_version)
  
  out = 'tmbhunt.out'
  log_stderr("# TMB-HUNT(web) %s > %s" % (params['fasta'], out))
  
  if not force and os.path.isfile(out):
    log_stderr("# -> skipped: %s already exists" % out)
    return parse_tmbhunt(proteins, out)
  
  # dump extraneous output into this blackhole so we don't see it
  if not __DEBUG__: twill.set_output(StringIO.StringIO())
  
  go("http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/aaTMB_Hunt.cgi")
  if __DEBUG__: showforms()

  # read up the FASTA format seqs
  fh = open(params['fasta'], 'r')
  fasta_seqs = fh.read()
  fh.close()
  
  # fill out the form
  fv("1", "sequences", fasta_seqs)

  submit()
  if __DEBUG__: showlinks()

  # small jobs will lead us straight to the results, big jobs
  # go via a 'waiting' page which we skip past if we get it
  job_id = None
  try:
    # we see this with big jobs
    result_table_url = follow("http://www.bioinformatics.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/tmp_output.*.html")
    job_id = result_table_url.split('tmp_output')[-1:][0].split('.')[0]
  except:
    # small jobs take us straight to the html results table
    pass

  # parse the job_id from the url, since due to a bug in
  # TMB-HUNT the link on the results page from large jobs is wrong
  if not job_id: job_id = follow("Full results").split('/')[-1:][0].split('.')[0]
  log_stderr("# TMB-HUNT(web) job_id is: %s <http://www.bioinformatics.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/tmp_output%s.html>" % (job_id, job_id))
  
  # polling until TMB-HUNT finishes
  # TMB-HUNT advises that 4000 sequences take ~10 mins
  # we poll a little faster than that
  polltime = (len(proteins)*0.1)+2
  while True:
    log_stderr("# TMB-HUNT(web): waiting another %i sec ..." % (polltime))
    time.sleep(polltime)
    try:
      go("http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/%s.txt" % (job_id))
      break
    except:
      polltime = polltime * 2
      
    if polltime >= 7200: # 2 hours
      log_stderr("# TMB-HUNT error: Taking too long.")
      return
    
  txt_out = show()
  
  # write raw TMB-HUNT results
  fh = open(out, 'w')
  fh.write(txt_out)
  fh.close()
  
  return parse_tmbhunt(proteins, out)