Esempio n. 1
0
def fetch_a_page():
    url = pagefetchqueue.get()
    if url:
        page = Page()
        page.url = url
        fetcher.fetch(page)
    return None
Esempio n. 2
0
def fetch_a_document():
    url = documentfetchqueue.get()
    if url:
        page = Page()
        page.url = url
        fetcher.fetch(page)
    return None
Esempio n. 3
0
    def get_page(self,arch,source_version,target_version,path):
        if path == '' or path == '/':
            return self.ChangeSet(arch,source_version,target_version)
        out = ''
        diffable = is_diffable(path)

        if not diffable:
            return 'Sorry, this file is not diffable'
        arches = self.request.get('arches')
        if arches:
            oarch,narch = (int(_) for _ in arches.split('|')[:2])
        else:
            oarch = version_arch(source_version)
            narch = version_arch(target_version)
        odata = fetcher.fetch(oarch,source_version,path)
        ndata = fetcher.fetch(narch,target_version,path)
        if odata is None:
            out += ('Sorry, could not fetch file %s for version %s.<br>' % \
                    (source_version,path))
        elif ndata is None:
            out += ('Sorry, could not fetch file %s for version %s.<br>' % \
                    (target_version,path))
        else:
            odata = odata.read()
            ndata = ndata.read()
            try:
                odata = odata.decode('utf8')
            except:
                odata = odata.decode('cp1251')

            try:
                ndata = ndata.decode('utf8')
            except:
                ndata = ndata.decode('cp1251')
            #return htmldiff.make_table(odata.splitlines(),ndata.splitlines())

            out += ('<div class="unified_diff">')
            out += ("<pre>")
            diff = unified_diff(odata.splitlines(),ndata.splitlines(),fromfile=source_version,tofile=target_version)
            prevdiv = ' '
            divs = { ' ' : '', '+' : 'add' , '-' : 'del' , '@' : 'linenum'}
            curdiv = ''
            for l in diff:
                line = escape(l)
                curdiv = line[0]

                if curdiv != prevdiv:
                    if prevdiv != ' ':
                        out += ('</div>')
                    if curdiv != ' ':
                        out += ('<div class="%s">' % divs[curdiv])
                out += (escape(l))
                out += ("\n")
                prevdiv = curdiv
            if curdiv != ' ':
                out += ('</div>')
            out += ("</div>")
            out += ("\n</pre>")
        return out
Esempio n. 4
0
def import_manifest(arch,version):
    v = Version.get_by_key_name(version)
    if v is None or not v.imported:
        m = fetcher.fetch(arch,version,'manifest.xml')
        if m is not None:
            m = Manifest(m)
            #xg_on = db.create_transaction_options(xg=True)
            v = version_ok(arch,version)
            prev = db.GqlQuery('select * from Version where imported = True and arch = {0}'.format(arch)).fetch(1)
            if prev is not None and len(prev) > 0:
                prev = prev[0]
                from htmldiff import Changeset
                pmanifest = Manifest(fetcher.fetch(arch,prev.value,'manifest.xml'))
                changes = Changeset(pmanifest,m)
                to_delete = [ pmanifest.files[x] for x in changes.dels | changes.changes 
                        if pmanifest.files[x]['path'].endswith('entity') and pmanifest.files[x]['path'].startswith('game/resources0.s2z')]
                to_import = [ m.files[x] for x in changes.adds | changes.changes if 
                        m.files[x]['path'].endswith('entity') and m.files[x]['path'].startswith('game/resources0.s2z')]
                total = len(to_delete)
                current = 1
                del(changes)
                del(m)
                del(pmanifest)
                for file in to_delete:
                    e = Node.get_by_key_name('|'.join([file['version'],file['path']]))
                    if e is not None:
                        logging.info('[{1}/{2}] Deleting {0} entity group'.format('|'.join([file['version'],file['path']]),current,total))
                        db.run_in_transaction(delete_group,e)
                    current += 1
                del(to_delete)
            else:
                prev = None
                to_import = [x for x in m.files.values() if x['path'].endswith('entity') and x['path'].startswith('game/resources0.s2z')]
            
            total = len(to_import)
            current = 1
            for file in to_import:
                if file['path'].endswith('.entity'):
                    e = Node.get_by_key_name('|'.join([file['version'],file['path']]))
                    if e is None:
                        data = fetcher.fetch(arch,file['version'],file['path'])
                        #if data is None:
                            #continue
                        logging.info('[%d/%d] importing %s %s into db' % (current,total,file['version'],file['path']))
                        db.run_in_transaction(parse_entity,data,file['version'],file['path'],[version])
                        #db.run_in_transaction_options(xg_on,parse_entity,file['version'],file['path'],[version])
                    #elif version not in e.versions:
                        #db.run_in_transaction(set_version,e,version)
                current += 1
            v.imported = True
            v.put()
            if prev is not None:
                prev.imported = False
                prev.put()
Esempio n. 5
0
    def ChangeSet(self,arch,source_version,target_version):
        out = ''
        arches = self.request.get('arches')
        if arches:
            oarch,narch = (int(_) for _ in arches.split('|')[:2])
        else:
            oarch = version_arch(source_version)
            narch = version_arch(target_version)

        if oarch is None: oarch = arch
        if narch is None: narch = arch

        omanifest = fetcher.fetch(oarch,source_version,'manifest.xml')
        nmanifest = fetcher.fetch(narch,target_version,'manifest.xml')


        if omanifest is not None:
            #Version(value=source_version,key_name=source_version).put()
            version_ok(oarch,source_version)
        if nmanifest is not None:
            #Version(value=target_version,key_name=target_version).put()
            version_ok(narch,target_version)            
        if omanifest is None:
            out += ('Sorry, could not fetch manifest for %s' % source_version)
        elif nmanifest is None:
            out += ('Sorry, could not fetch manifest for %s' % target_version)
        else:
            omanifest = Manifest(omanifest)
            nmanifest = Manifest(nmanifest)

            changeset = Changeset(omanifest,nmanifest)

            changes = ( {'path' : f, 'old_version' : omanifest.files[f]['version'], \
                    'new_version' : nmanifest.files[f]['version'] } for f in changeset.changes )
            adds = ( {'path' : f,  \
                    'new_version' : nmanifest.files[f]['version'] } for f in changeset.adds )
            dels = ( {'path' : f, 'old_version' : omanifest.files[f]['version']\
                     } for f in changeset.dels )

            template_values = {
                    'source_version': source_version,
                    'target_version': target_version,
                    'changes': changes,
                    'adds' : adds,
                    'dels' : dels,
                    'base_url' : fetcher.get_base_url(arch),
                    'base_url2' : fetcher.get_base_url(arch, 1),
                    'arches' : '|'.join([str(_) for _ in [oarch,narch]]),
                    }
            template = templates.get_template('changeset.html')
            return template.render(template_values)
        self.response.out.write(out)
        return None
Esempio n. 6
0
def test_fetcher_sets_url(mock_requests):
    """Test that the fetcher correctly sets the podcast's url."""
    mock_requests.get(FEED_URL, text=read_test_file("canvas"))

    result = fetch(FEED_URL)

    assert result.url == FEED_URL
Esempio n. 7
0
def get(artist, song, album=None):
    """Fetch the lyrics as text."""
    info = artist, song, album or ''
    try:
        return database.load(*info)
    except LookupError:
        return fetcher.fetch(*info)
Esempio n. 8
0
def extract(entrypoint):
    """Extract feed from entrypoint

    Entrypoint can be a full url or a domain name. When a domain name
    is given, it attempts to guess the homepage where it can extract
    the feed.

    """
    fetched_data = fetch(entrypoint)
    if fetched_data is None:
        return None
    else:
        urls, html = fetched_data
    feeds = extract_feeds(html, urls[-1])
    expanded_feeds = expand_feeds(feeds)
    results = filter_expanded_feeds(urls, expanded_feeds)

    # TODO loop toward domain for cases such as http://www.lesechos.fr/economie-politique/france/
    # TODO Scoring based on sublinks? -> http://www.lefigaro.fr

    if results:
        # Temporary behavior
        # on multiple results: select the first one
        # TODO Implement better scoring
        del results[0]['sublinks']
        results[0]['link'] = results[0]['link'][0]
        return results[0]

    return None
Esempio n. 9
0
def extract(entrypoint):
    """Extract feed from entrypoint

    Entrypoint can be a full url or a domain name. When a domain name
    is given, it attempts to guess the homepage where it can extract
    the feed.

    """
    fetched_data = fetch(entrypoint)
    if fetched_data is None:
        return None
    else:
        urls, html = fetched_data
    feeds = extract_feeds(html, urls[-1])
    expanded_feeds = expand_feeds(feeds)
    results = filter_expanded_feeds(urls, expanded_feeds)

    # TODO loop toward domain for cases such as http://www.lesechos.fr/economie-politique/france/
    # TODO Scoring based on sublinks? -> http://www.lefigaro.fr

    if results:
        # Temporary behavior
        # on multiple results: select the first one
        # TODO Implement better scoring
        del results[0]['sublinks']
        results[0]['link'] = results[0]['link'][0]
        return results[0]

    return None
Esempio n. 10
0
def test_fetcher_returns_parsed_feed(mock_requests):
    """Test that the fetcher correctly fetches and parses a podcast feed."""
    mock_requests.get(FEED_URL, text=read_test_file("canvas"))

    result = fetch(FEED_URL)

    assert result.title == "Canvas"
def main():
    fetcher.fetch()
    filenames = os.listdir(os.path.join(".", DIRECTORY))
    f2l_dict = dict()
    for filename in filenames:
        if filename.split(os.extsep)[-1] in ["htm", "html"]:
            # проверяем расширение файла
            print("Parsing file: " + filename)
            f = open(os.path.join(".", DIRECTORY, filename), "r", encoding="utf-8")
            parse_page(f.read(), f2l_dict)
            f.close()
        else:
            print("Wrong file type: " + filename)
    out = open("output.txt", "w", encoding="utf-8")
    out.write(str(f2l_dict))
    out.close()
    print("Обработка завершена!")
Esempio n. 12
0
def test_fetcher_sets_last_fetched(mock_requests):
    """Test that the fetcher correctly updates the podcast's last_fetched property."""
    mock_requests.get(FEED_URL, text=read_test_file("canvas"))

    now = datetime.datetime.now()
    with freezegun.freeze_time(now):
        result = fetch(FEED_URL)

        assert result.last_fetched == now
Esempio n. 13
0
def handle(job, *args, **kwargs):
    queue = kwargs['queue']
    task = json.loads(job)
    url = task["url"]
    status, source = fetcher.fetch(url, use_proxy=False)
    logger.info('%s|%s' % (url, status))
    try:
        _, source = encoding.html_to_unicode('', source)
    except Exception, e:
        print e
Esempio n. 14
0
def get_stringtables_entities(arch,version):
    try:
        stringtable = memcache.get('stringtable|entities|{0}'.format(version))
    except:
        flush_all()
    if stringtable is not None:
        return stringtable
    stringtable = {}
    stringtable_version = Manifest(fetcher.fetch(arch,version,'manifest.xml')).files['game/resources0.s2z/stringtables/entities_en.str']['version']
    tabledata = fetcher.fetch(arch,stringtable_version,'game/resources0.s2z/stringtables/entities_en.str').read().decode('utf8')
    for line in tabledata.splitlines():
        m =  re_entry.match(line)
        if m:
            stringtable[m.group(1)] = m.group(2).strip()
    try:
        memcache.set('stringtable|entities|{0}'.format(version),stringtable)
    except:
        flush_all()
    return stringtable
Esempio n. 15
0
def set_user():
    connection = mdb.connect(host='localhost', user='******', passwd='', db='fbhack', use_unicode=True, charset='utf8')
    cursor = connection.cursor()

    data = (
        int(request.form['facebook_id']),
        request.form['first_name'],
        request.form['last_name'],
        request.form['username'],
        request.form['phone']
    )

    cursor.execute("INSERT INTO users (facebook_id, first_name, last_name, username, phone) VALUES(%s, %s, %s, %s, %s);", data)

    connection.commit()

    fetch()

    return jsonify(result="OK")
Esempio n. 16
0
	def run(self):
		item = self.queue.get()

		while item != None:
			try:
				url = item['url']
				key = item['key']
				constraint = item['constraint']
				data = fetch(url)

				if data == None:
					self.logger.info('Not fetched: %s because type != text/html', url)
				else:
					links = get_all_links(data, base = url)
					feeds = get_all_feeds(data, base = url)
					interesting = collect(links)
	
					if interesting:
						self.collection_mutex.acquire()
						if key not in self.collection:
							self.collection[key] = {'feeds':{}}

						if feeds:
							for feed in feeds:
								self.collection[key]['feeds'][feed['href']] = feed['type']

						for service, accounts in interesting.items():
							if service not in self.collection[key]:
								self.collection[key][service]  = {}

							for a,u in accounts.items():
								self.collection[key][service][a] = {'url': u, 'depth':constraint.depth}
						self.collection_mutex.release()


					for l in links:
						new_constraint = constraint.inherit(url, l)
						if new_constraint == None:
							continue
	
						self.mutex.acquire()
						if l not in self.visited_urls:
							self.queue.put({'url':l, 'key':key, 'constraint': new_constraint})
							self.visited_urls.add(l)
						self.mutex.release()

			except HTTPError:
				self.logger.info('HTTPError exception on url: %s', url)

			self.queue.task_done()

			item = self.queue.get()

		self.queue.task_done() # task_done on None
Esempio n. 17
0
def process_wave(wave):
    """
    Gets the median intensities for a wavelength, and the file paths

    If no *good* data is found in first 6 hours of day at 15 minutes steps,
    then the value is replaced with NaN in the series.
    Good images are those that have a "quality" rating of 0

    At the end, all NaNs are filled with the last known value until then
    Unkown values in the beginning are filled from the next known value

    Args:
        wave (str): wave to process

    Returns:
        list containing the wave str, list of filenames, and intensities

    """
    paths = pd.Series(index=date_list)
    raw = pd.Series(index=date_list)
    for date in datetime_list:
        fles = fetch(date, date + timedelta(minutes=1), wave)
        missing_data = False
        while no_images(fles):
            date += timedelta(minutes=15)
            fles = fetch(date, date + timedelta(minutes=1), wave)
            if date.hour >= 6:
                missing_data = True
                break
        # print(date)
        if not missing_data:
            index = [str(date.date())]
            fle = fles[0]
            med_int = process_med_int(fle)
            paths.loc[index] = fle
            raw.loc[index] = med_int
    paths = paths.ffill()  # propagate missing values forwards
    paths = paths.bfill()  # backwards. (if initial dates lack data)
    raw = raw.ffill()
    raw = raw.bfill()
    return [wave, paths, raw]
Esempio n. 18
0
 def _load_url(self, url_u, encoding=None):
     # word hit list obsolete
     self.wordhitview.clear_words()
     # set text in textview
     ret = fetcher.fetch(url_u)
     if not encoding:
         encoding = decoder.detect_encoding(ret.txt_byte)
     txt_u = decoder.decode(ret.txt_byte, encoding)
     txt_u = unmarkup.unwiki(txt_u) or unmarkup.unhtml(txt_u)
     self.text = word.Text()
     self.text.set_from_txt_u(txt_u)
     self.textview.set_text(self.text, encoding, url_u)
Esempio n. 19
0
 def _load_url(self, url_u, encoding=None):
     # word hit list obsolete
     self.wordhitview.clear_words()
     # set text in textview
     ret = fetcher.fetch(url_u)
     if not encoding:
         encoding = decoder.detect_encoding(ret.txt_byte)
     txt_u = decoder.decode(ret.txt_byte, encoding)
     txt_u = unmarkup.unwiki(txt_u) or unmarkup.unhtml(txt_u)
     self.text = word.Text()
     self.text.set_from_txt_u(txt_u)
     self.textview.set_text(self.text, encoding, url_u)
Esempio n. 20
0
 def get_page(self,arch,version):
     manifest = fetcher.fetch(arch,version,'manifest.xml')
     if manifest is not None:
         version_ok(arch,version)
     else:
         self.response.out.write("Sorry, could not fetch manifest for {0} version".format(version))
         return
     manifest = Manifest(manifest)
     data = fetcher.fetch(arch,fetcher.normalize_ver(manifest.files['change_log_color.txt']['version']),'change_log_color.txt')
     if data is None: return
     data = data.read()
     try:
         data = data.decode('utf8')
     except:
         data = data.decode('cp1251')
     data = data.replace('\r\n','\n')
     data = hon2html(data)
     data = re.sub(ver_sub,r'\1[color=Yellow][SIZE=6][b]\2[/b][/size][/color]',data)
     data = re.sub(head_sub,r'\1[B]==[SIZE=4]\2[/size]==[/b]',data)
     data = re.sub(hr_sub,r'[hr][/hr]',data)
     smilies = fetch_honsmilies()
     data = re.sub(smilies[0],lambda m: '%s%s  [b]%s[/b]' % (m.group(1), smilies[1][m.group(2).lower()], m.group(2)),data)
     return ''.join(['<pre>',data,'</pre>'])
Esempio n. 21
0
    def get_page(self,arch,version,path,fpath):
        manifest = fetcher.fetch(arch,version,'manifest.xml')
        if manifest is not None:
            #Version(value=version,key_name=version).put()
            version_ok(arch,version)
        else:
            self.response.out.write("Sorry, could not fetch manifest for {0} version".format(version))
            return
        manifest = Manifest(manifest)
        path = path[1:]
        if fpath != '':
            print 'file requested!'
        else:
            if path == '':
                nodes = manifest.files.keys()
            else:
                l = len(path)
                nodes = [f[l:] for f in manifest.files.keys() if f.startswith(path)]
            dirs = []
            files = []
            for x in nodes:
                n = x.split('/')
                if len(n) == 1:
                    x = path + x
                    f = { 'path' : n[0] , 'version' : fetcher.normalize_ver(manifest.files[x]['version']), 'size' : manifest.files[x]['size'] }
                    f['lang'] = get_lang(n[0])
                    f['fullpath'] = x
                    files.append(f)
                else:
                    dirs.append(n[0])
            dirs = frozenset(dirs)

            if path != '':
                up_url = '..'
            else:
                up_url = ''

            template_values = {
                    'version': version,
                    'path': path,
                    'dirs': sorted(list(dirs)),
                    'files' : files,
                    'up_url' : up_url,
                    'base_url' : fetcher.get_base_url(arch),
                    'base_url2' : fetcher.get_base_url(arch, 1),
                    }
            template = templates.get_template('folder.html')
            return template.render(template_values)
Esempio n. 22
0
def handler():
    if flask.request.method == "GET":
        return {"status": "ok"}, 200
    else:
        if flask.request.json is None:
            app.logger.error("POST req without req body received")
            return {"status": "error"}, 500
        else:
            try:
                ids = flask.request.json["ids"]
                tweets = fetcher.fetch(ids)
                app.logger.info("POST req with req body success")
                return {"status": "ok", "data": tweets}, 200
            except Exception:
                app.logger.error("POST req with req body failed")
                return {"status": "error"}, 500
Esempio n. 23
0
def url_handler(url_u, dir='/tmp/t'):
    if not os.path.isdir(dir):
        os.makedirs(dir)

    os.environ["ORIG_FILENAMES"] = "1"
    filename = os.path.join(dir, urlrewrite.url_to_filename(url_u)) + '.txt'

    ret = fetcher.fetch(url_u)
    txt_u = decoder.detect_decode(ret.txt_byte)
    txt_u = unmarkup.unwiki(txt_u)

    # add license notice
    tm = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
    notice = u"\n\n%s\nRetrieved on %s from:\n  %s" % ('-'*78, tm, ret.url_u)
    notice += (u"\nLicensed under CC-BY-SA, see %s" %
               "http://creativecommons.org/licenses/by-sa/3.0/")
    txt_u += notice

    txt_byte = decoder.encode(txt_u)
    open(filename, 'w').write(txt_byte)
Esempio n. 24
0
def url_handler(url_u, dir='/tmp/t'):
    if not os.path.isdir(dir):
        os.makedirs(dir)

    os.environ["ORIG_FILENAMES"] = "1"
    filename = os.path.join(dir, urlrewrite.url_to_filename(url_u)) + '.txt'

    ret = fetcher.fetch(url_u)
    txt_u = decoder.detect_decode(ret.txt_byte)
    txt_u = unmarkup.unwiki(txt_u)

    # add license notice
    tm = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
    notice = u"\n\n%s\nRetrieved on %s from:\n  %s" % ('-' * 78, tm, ret.url_u)
    notice += (u"\nLicensed under CC-BY-SA, see %s" %
               "http://creativecommons.org/licenses/by-sa/3.0/")
    txt_u += notice

    txt_byte = decoder.encode(txt_u)
    open(filename, 'w').write(txt_byte)
Esempio n. 25
0
    def get_page(self, arch, version, path):
        lang = self.request.query_string
        if lang == "":
            lang = "text"
        if not is_diffable(path):
            return "Sorry, viewing this types of files is not allowed"
        data = fetcher.fetch(arch, version, path)
        if data is None:
            self.response.out.write("Sorry, could not fetch file %s for version %s.<br>" % (version, path))
            return None
        else:
            data = data.read()
            try:
                data = data.decode("utf8")
            except:
                data = data.decode("cp1251")

            template_values = {"data": pygmentize(lang, data)}

            template = get_template("highlight.html")
            return template.render(template_values)
Esempio n. 26
0
File: sync.py Progetto: ekevoo/nippl
	def _refresh(self, url_prefix, item, stamps_dict, available):
		try:
			known_item = item in stamps_dict
			stamp = None
			if known_item and os.access(self._path('current', item), os.R_OK):
				stamp = stamps_dict[item]
			resp = fetch(url_prefix + item, stamp)
			local_path = self._path('new', item)

			if resp.status >= 400: # error statuses
				if known_item:
					del stamps_dict[item]
				return known_item # changed if previously known

			elif resp.status == 304: # not modified
				available.add(item)
				os.link(
					self._path('current', item),
					self._path('new',     item))
				stamps_dict[item] = resp.date
				return False # unchanged

			elif 200 <= resp.status < 300: # downloading
				available.add(item)
				resp.save(local_path)
				stamps_dict[item] = resp.date
				return True # changed

			else:
				raise Exception("Don't know what to do with response %s", resp.status)

		except:
			import traceback
			logging.error('Failed to fetch %s%s. Skipping. Exception info:\n%s',
				url_prefix, item, traceback.format_exc())
			return False # assume unchanged
Esempio n. 27
0
def fetch_action():
    questions = fetcher.fetch(from_time=load_config()["last-sync"])
    classified_questions = get_classifier().classify(questions)
    relation_store.add_list_of_questions(classified_questions)
Esempio n. 28
0
def make_requests(requests):
  start = time.time()
  for ok, resp in fetch(requests, concurrent=100):
    print ok, resp
  delta = time.time() - start
  print '%.02f req/s' % (count / delta)
Esempio n. 29
0
def test_save_start_time_to_config(config, current_time):
    with mock.patch("time.time", return_value=current_time):
        fetcher.fetch(sites=[], from_time=0)

    config.assert_called_with("last-sync", current_time)
Esempio n. 30
0
    def get_page(self,arch,version,hero):
        v = Version.get_by_key_name(version)
        if arch != fetcher.ARCHS.LINUX_RETAIL:
            return '<pre>Sorry, DB is disabled for RCT/SBT</pre>'
        elif v is None or not v.imported:
            versions = get_versions()
            versions.sort(key = lambda x: [int(y) for y in x.split('.')])
            if version == versions[-1]:
                self.response.out.write("Sorry, this version is not imported into db yet, importing was put into queue")
                taskqueue.add(url='/import',params={'version' : version,'arch' : arch},queue_name='importer')
            else:
                self.redirect('/heroes/latest/?' + self.request.query_string)
            return None
        else:
            if hero is None:
                manifest = fetcher.fetch(arch,version,'manifest.xml')
                manifest = Manifest(manifest)
                query = "Select * from Node where tag='hero'".format(version)
                q = db.GqlQuery(query)
                result = q.fetch(1000)
                result = [_ for _ in result if _.name not in ['wl_Warlock']]
                for hero in result:
                    if hasattr(hero,'attackprojectile') and hero.attackprojectile != '':
                        projectile = db.GqlQuery("Select * from Node where name='{0}'".format(hero.attackprojectile)).fetch(1)[0]
                        if hasattr(projectile,'speed'):
                            hero.projectilespeed = projectile.speed
                        else:
                            hero.projectilespeed = '""'
                    else:
                        hero.projectilespeed = '""'

                    #get url for icon
                    icon = hero.icon.replace('.tga','.dds')
                    path = '/'.join(hero.key().name().split('|')[1].split('/')[:-1])
                    path = '/'.join([path,icon])
                    path = path.replace('game/resources0.s2z','game/textures.s2z/00000000')
                    if path in manifest.files:
                        path = '/'.join([manifest.files[path]['version'],path])
                    else:
                        logging.info("Failed to create url for hero icon :( :")
                        logging.info(icon)
                        logging.info(path)
                    hero.iconurl = path
                template_values = {}
                template_values['data'] = result
                template_values['stringtables'] = get_stringtables_entities(arch,version)
                template_name = self.request.get('template')
                if template_name and template_name == 'csv':
                    template = templates.get_template('heroes.csv')        
                else:
                    template = templates.get_template('heroes.html')        
                #self.response.out.write(template.render(template_values))
                #return None
                return template.render(template_values)
            else:
                hero = db.GqlQuery("Select * from Node where tag='hero' and name = :1",hero).fetch(1)
                if len(hero) == 0:
                    return 'Sorry, such hero is not found'
                hero = hero[0]
                #get url for icon
                manifest = fetcher.fetch(arch,version,'manifest.xml')
                manifest = Manifest(manifest)
                icon = hero.icon.replace('.tga','.dds')
                path = '/'.join(hero.key().name().split('|')[1].split('/')[:-1])
                path = '/'.join([path,icon])
                path = path.replace('game/resources0.s2z','game/textures.s2z/00000000')
                path = '/'.join([manifest.files[path]['version'],path])
                hero.iconurl = path
                abilities = db.GqlQuery("Select * from Node where tag='ability' and name in :1",[hero.inventory0,hero.inventory1,hero.inventory2,hero.inventory3]).fetch(10)
                for a in abilities:
                    icon = a.icon.replace('.tga','.dds')
                    path = '/'.join(a.key().name().split('|')[1].split('/')[:-1])
                    path = '/'.join([path,icon])
                    path = path.replace('game/resources0.s2z','game/textures.s2z/00000000')
                    path = '/'.join([manifest.files[path]['version'],path])
                    a.iconurl = path

                #abilities = dict([(a.name,a) for a in abilities])
                template_values = {}
                template_values['entity'] = hero
                template_values['version'] = version
                template_values['abilities'] = abilities
                template_values['stringtables'] = get_stringtables_entities(arch,version)
                template = templates.get_template('hero.html')        
                return template.render(template_values)
Esempio n. 31
0
def core(args):
    """
    """
    csv = os.path.expanduser(args.csv_file)
    fetcher.fetch(csv)
Esempio n. 32
0
from fetcher import fetch
from generator import generate
from settings import CALENDAR_ID

generate(fetch(CALENDAR_ID))
Esempio n. 33
0
 def get_page(self,arch,version,path):
     dds = fetcher.fetch(arch,version,path)
     return dds2png(dds)
Esempio n. 34
0
                print h.get_pos(),
            print

    import sys

    text = Text()
    text.set_from_file(sys.argv[1])
    text.do_index()
    data = text.by_freq()
    print_by_freq(data)

    import fetcher

    url_u = u"http://www.dagbladet.no"
    text = Text()
    ret = fetcher.fetch(url_u)
    encoding = decoder.detect_encoding(ret.txt_byte)
    text.set_from_txt_byte(ret.txt_byte, encoding, untag=True)
    text.do_index()
    data = text.by_freq()
    print_by_freq(data)
    print (encoding)

    sys.exit()

    def out(dct, f):
        ws = dct.keys()
        ws = sorted(ws, cmp=lambda x, y: cmp(x.lower(), y.lower()))
        s = ""
        for w in ws:
            s += "%-6.6s  %s\n" % (dct[w].len_hits(), w)
def test_happy_path():
    url = 'http://www.google.com'
    data = fetcher.fetch(url)
    assert data
    assert len(data) > 0
    assert 'google' in data.lower()
Esempio n. 36
0
def test_invalid_feed(mock_requests):
    """Test that the fetcher raises an error if the url does not point to a valid rss feed."""
    mock_requests.get(FEED_URL, text="<html><body><h1>Not A Feed!</h1></body></html>")

    with pytest.raises(InvalidFeed):
        fetch(FEED_URL)
Esempio n. 37
0
def make_requests(requests):
    start = time.time()
    for ok, resp in fetch(requests, concurrent=100):
        print ok, resp
    delta = time.time() - start
    print '%.02f req/s' % (count / delta)
Esempio n. 38
0
def main():
    # Part where argparser figures out your command
    parser = argparse.ArgumentParser(
        description='Parse NCBI and then work with Biological data')

    # Argument for clearing out the storage folder (Mostly for testing purposes)
    parser.add_argument('-d',
                        '--delete',
                        dest='delete',
                        default=False,
                        action='store_true',
                        help="delete current storage")

    # Argument for fetching.
    parser.add_argument('-f',
                        '--fetch',
                        dest='fetch',
                        default="",
                        help='Fetches from ncbi and adds to storage: \n '
                        'Usage: -f [Accession number or boolean operators]')

    parser.add_argument(
        '-i',
        '--index',
        dest='index',
        action='store_true',
        help=
        'Resets the indexes. This can be done manually through this method or specified to do it'
        ' everytime from the configs.')

    parser.add_argument(
        '-m',
        '--mafft',
        dest='mafft',
        default=False,
        action='store_true',
        help=
        "Runs mafft when pulling. Optional alignment but requires -p or --pull to be effective. "
        "Can also be specified to run automatically in config")

    parser.add_argument(
        '-p',
        '--pull',
        dest='pull',
        default=False,
        action='store_true',
        help="Pull from storage. "
        "The genes and species specified are specified in genes.lst and species.lst."
    )

    parser.add_argument(
        '-s',
        '--setup',
        dest='setup_structure',
        default="",
        help="Usage: -s [storage location]" + "\n"
        " Sets up a default structure for storage and indexes."
        "This should be done when moving storage to a location "
        "outside of the cloned folder.")

    # This stores all of the values from the parser
    args = parser.parse_args()

    delete = args.delete

    query_fetch = args.fetch
    index = args.index

    mafft_args = args.mafft
    pull = args.pull
    setup_structure = args.setup_structure

    # Testing output
    output = "Output: \n"

    # This is the part where we are reading from the config
    config = configparser.ConfigParser()
    config.read('ncbifetcher.config')

    email = config['OPTIONS']['email']
    location_index = config['INDEX']['index_location']
    location_storage = config['STORAGE']['storage_location']
    location_output = config['OUTPUT']['output_location']

    reset_indexes_default = config['OPTIONS']['reset_indexes_everytime']
    run_mafft_config = config['OPTIONS']['run_mafft_everytime']

    # Testing: Deletes everything in the folders and resets the indexes
    if delete:
        print("deleting... \n")
        delete_folder_contents(location_storage)
        delete_folder_contents(location_output)

        # Optional resetting indexes
        if reset_indexes_default == 1 or reset_indexes_default:
            reset_indexes(location_storage, location_index)
        return

    # Fetches from genbank
    if len(query_fetch) >= 1:
        # If the input is a file, fetches all from the file
        if os.path.isfile(query_fetch):
            print("Fetching from file: ", query_fetch)
            accession_numbers_from_file = []
            lines = open(query_fetch, "r")

            for line in lines:  # Gets every possible entry from file
                accession_numbers_from_file.append(line.strip().strip('\n'))

            accession_numbers_from_file = ','.join(accession_numbers_from_file)

            # Fetches based on the accession numbers
            fetch(accession_numbers_from_file, location_storage, email)

        else:  # Fetches the single query
            print("Fetching...")
            fetch(query_fetch, location_storage, email)

        # Optional resetting indexes
        if reset_indexes_default == 1 or reset_indexes_default:
            reset_indexes(location_storage, location_index)
        return

    # This is a way to sort the indexes
    if index:
        print("Resetting indexes...")
        output += "Index: \n"

        reset_indexes(location_storage, location_index)
        return

    # Pulling from storage - Default set to wherever index says to go
    if pull:
        print("Pulling...")

        pull_query_to_fasta(location_output,
                            location_index,
                            location_storage,
                            run_mafft=mafft_args or run_mafft_config == 1
                            or run_mafft_config == "true")
        return

    # For setting up a file structure at a location other than default
    if len(setup_structure) >= 1:
        print("Setting up structure at " + setup_structure + "...")
        ensure_folder_scheme_storage(setup_structure)
        return
Esempio n. 39
0
        'nid': nid,
        'pid': pid,
        'cover': cover,
        'playlistId': playlistId,
        'o_playlistId': o_playlistId,
        'cid': cid,
        'subcid': subcid,
        'osubcid': osubcid,
        'category': category,
        'cateCode': cateCode,
        'pianhua': pianhua,
        'tag': tag,
        'tvid': tvid,
        'title': title,
        'last': last,
        'brief': brief
    }
    return item

if __name__ == '__main__':
    import fetcher
    url = 'http://tv.sohu.com'
    url = 'http://tv.sohu.com/20131223/n392267093.shtml'
    url = 'http://tv.sohu.com/20131223/n392267093.shtml'
    status, content = fetcher.fetch(url)
    _, ucontent = encoding.html_to_unicode('', content)
    #print extract_links(url, ucontent)
    #print extract_content(url, ucontent)
    #print extract_sohutv(url, ucontent)
    print extract_sohutv_data_by_regex(url, ucontent)
Esempio n. 40
0
            print f, w.get().encode('utf-8'),
            for h in w.get_hits():
                print h.get_pos(),
            print

    import sys
    text = Text()
    text.set_from_file(sys.argv[1])
    text.do_index()
    data = text.by_freq()
    print_by_freq(data)

    import fetcher
    url_u = u'http://www.dagbladet.no'
    text = Text()
    ret = fetcher.fetch(url_u)
    encoding = decoder.detect_encoding(ret.txt_byte)
    text.set_from_txt_byte(ret.txt_byte, encoding, untag=True)
    text.do_index()
    data = text.by_freq()
    print_by_freq(data)
    print(encoding)

    sys.exit()

    def out(dct, f):
        ws = dct.keys()
        ws = sorted(ws, cmp=lambda x, y: cmp(x.lower(), y.lower()))
        s = ''
        for w in ws:
            s += '%-6.6s  %s\n' % (dct[w].len_hits(), w)
Esempio n. 41
0
    filter_mediawiki = mediawiki.MediawikiFilter()
    txt_u = filter_mediawiki.get_wiki_body(txt_u)
    return txt_u

def unwiki(txt_u):
    filter_mediawiki = mediawiki.MediawikiFilter()
    filter_html = html.HtmlFilter()
    txt_u = filter_mediawiki.get_wiki_body(txt_u)
    txt_u = filter_html.resolve_specialchars(txt_u)
    txt_u = filter_mediawiki.unmarkup(txt_u)
    txt_u = filter_html.unmarkup(txt_u)
    return txt_u


if __name__ == "__main__":
    import decoder

    import fetcher
    ret = fetcher.fetch('http://en.wikipedia.org/w/index.php?title=Linguistics&action=edit')
    txt_u = decoder.detect_decode(ret.txt_byte)
    txt_u = unwiki(txt_u) or unhtml(txt_u)
    print(decoder.encode(txt_u))
    sys.exit()

    txt_byte = open(sys.argv[1]).read()
    txt_u = decoder.detect_decode(txt_byte)
    txt_u = unwiki(txt_u) or unhtml(txt_u)
    print(decoder.encode(txt_u))
    sys.exit()