Exemple #1
0
def main():
    connect = client.rtm_connect()
    if not connect:
        print('Slack RTM Connect Error!')
        return
    print('Slack RTM Connect Success!')
    while True:
        for data in client.rtm_read():
            if data['type'] == 'message':
                if 'bot_id' not in data:
                    parse(data['text'])
        time.sleep(0.1)
Exemple #2
0
    def __init__(self, url, key=None, secret=None, expiration_days=0, private=False, content_type=None, create=True):
        from boto.s3.connection import S3Connection
        from boto.s3.key import Key

        self.url = parse(url)
        self.expiration_days = expiration_days
        self.buffer = StringIO()

        self.private = private
        self.closed = False
        self._readreq = True
        self._writereq = False
        self.content_type = content_type or mimetypes.guess_type(self.url.path)[0]

        bucket = self.url.netloc
        if bucket.endswith(".s3.amazonaws.com"):
            bucket = bucket[:-17]

        self.client = S3Connection(key, secret)

        self.name = "s3://" + bucket + self.url.path

        if create:
            self.bucket = self.client.create_bucket(bucket)
        else:
            self.bucket = self.client.get_bucket(bucket, validate=False)

        self.key = Key(self.bucket)
        self.key.key = self.url.path.lstrip("/")
        self.buffer.truncate(0)
def is_open(dpmt, course, crn):
     base = "http://my.illinois.edu"
     page = blogotubes('http://www.courses.illinois.edu')
     if not page:
          print(page); return -1
     url = geturl(page, 'Class Schedule')
     if not url: 
          print(url); return -1
     page = blogotubes(base+url)
     if not page: 
          print('lol'+page); return -1
     url = geturl(page, dpmt)
     if not url:
          print(url); return -1
     page = blogotubes(base+url) # Get list of courses in dpmt
     if not page:
          print(page); return -1
     url = geturl(page, course)
     if not url:
          print(url); return -1
     page = blogotubes(base+url) # Get list of sections in course
     if not page:
          print(page); return -1
     result = parse(page, crn) # Parse openness of section
     if result:
          return 1
     else:
          return 0
Exemple #4
0
def migrate(path, name):
    print('----%s----' % name)

    input_f = open(path, 'r', encoding='utf-8')

    quotes = []

    prev = ''
    for line in input_f.readlines():
        text, page = parse(line, prev)

        if len(page) > 0:
            verifyPage(page, line)

            pair = dict()
            pair['text'] = text.lstrip()
            pair['page'] = page
            quotes += [pair, ]
            prev = ''
        else:
            prev = text

    input_f.close()

    if len(prev):
        pair['text'] = prev
        pair['page'] = 0


    book = {
        'title': name,
        'quotes': quotes
    }

    return book
Exemple #5
0
	def detect_redirect(self):
		parse = urllib.request.urlparse

		# the original url
		org_url = self.url_data

		# get an opener doing redirections 
		try:
			opener = self._create_fetcher(redirect_handler=False)
			response = opener.open(self.url)
		except:
			raise UnknownHostName(self.url)	

		# the new url
		new_url = parse(response.geturl())

		# detect a redirection
		new_loc = new_url.scheme + '://' + new_url.netloc
		org_loc = org_url.scheme + '://' + org_url.netloc

		self.is_redirected = not(new_loc == org_loc)

		if self.is_redirected:
			self.printer.print_debug_line('%s redirects to %s' % (org_loc, new_loc),2)
		else:
			self.printer.print_debug_line('%s does not redirect' % (org_loc, ), 2)

		# create an response object and add it to the cache
		R = _create_response(response)
		self.cache[new_loc] = R
		self.cache[self.url] = R

		return (self.is_redirected, new_loc)
 def process_response(self, r):
     def parse(item, type):
         text = item.xpath('.//td[3]/text()')[0].strip()
         context = item.xpath('.//td[@class="codeContext"]/text()')
         where = item.xpath('.//td[@class="linenumber"]/text()')[0]
         return {
             'type': type,
             'text': text,
             'context': context[0] if context else '',
             'where': where
         }
     doc = html.document_fromstring(r)
     return chain((parse(item, 'Error') 
                   for item in doc.xpath('//div[@id="errors"]//tr')),
                  (parse(item, 'Warning') 
                   for item in doc.xpath('//div[@id="warnings"]//tr')))
Exemple #7
0
def determine_course_status(subject_name, course_name, term):
  pages = {}

  # PeopleSoft stores all pages in iframe whose src is continually modified.
  # Fetch the page inside this iframe.
  pages['container'] = fetch('https://prdrps2.ehs.ucalgary.ca/psauthent/class-search/public')
  target_content = parse('[name=TargetContent]', pages['container'])[0]
  search_form_url = urllib.parse.unquote(target_content['src'])

  # Fetch class search form.
  pages['course_search'] = fetch(search_form_url)

  course_search_url = 'https://prdrps2.ehs.ucalgary.ca/psc/saprd/' + \
                     'EMPLOYEE/HRMS/c/COMMUNITY_ACCESS.CLASS_SEARCH.GBL'

  # Fetch initial set of search results.
  pages['search_results_partial'] = fetch_initial_search_results(
    course_search_url, term, subject_name, course_name, pages['course_search']
  )

  # Fetch full set of search results.
  # TODO: for classes where all results are on first page (i.e., those with <=
  # 3 sections), do not perform this query, but instead simply use
  # search_results_partial.
  pages['search_results_full'] = fetch_full_search_results(
    course_search_url, pages['search_results_partial']
  )

  return pages['search_results_full']
Exemple #8
0
def fetch_full_search_results(course_search_url, partial_search_results):
  params = {
    'ICAJAX': '1',
    'ICType': 'Panel',
    'ICElementNum': '0',
    'ICStateNum': '57',
    'ICAction': '$ICField106$hviewall$0',
    'ICXPos': '0',
    'ICYPos': '0',
    'ICFocus': '',
    'ICSaveWarningFilter': '0',
    'ICChanged': '-1',
    'ICResubmit': '0',
    'ICModalWidget': '0',
    'ICZoomGrid': '0',
    'ICZoomGridRt': '0',
    'ICModalLongClosed': '',
    'ICActionPrompt': 'false',
    'ICFind': '',
    'ICAddCount': '',
  }

  dynamic_keys = ('ICSID', 'ICStateNum')
  dynamic_params = {}
  for key in dynamic_keys:
    dynamic_params[key] = parse('form[name=win0] input[name=%s]' % key,
      partial_search_results)[0]['value']
  params.update(dynamic_params)

  return fetch(course_search_url, params)
Exemple #9
0
    def hosted_results(self, session_host=None, session_name=None,
                       date_range=None, sort=ct .SORT_TIME,
                       order=ct.ORDER_DESC, page=1):
        """ Search hosted races results using various fields. Returns a tuple
            (results, total_results) so if you want all results you should 
            request different pages (using page) until you gather all 
            total_results. Each page has 25 (ct.NUM_ENTRIES) results max."""

        lowerbound = ct.NUM_ENTRIES * (page - 1) + 1
        upperbound = lowerbound + ct.NUM_ENTRIES - 1

        data = {'sort': sort, 'order': order, 'lowerbound': lowerbound,
                'upperbound': upperbound}
        if session_host is not None:
            data['sessionhost'] = session_host
        if session_name is not None:
            data['sessionname'] = session_name

        if date_range is not None:
            # Date range
            tc = lambda s:\
                time.mktime(datetime.datetime.strptime(s, "%Y-%m-%d").
                            timetuple()) * 1000
            data['starttime_lowerbound'] = tc(date_range[0])
            # multiplied by 1000
            data['starttime_upperbound'] = tc(date_range[1])

        r = self.__req(ct.URL_HOSTED_RESULTS, data=data)
        # tofile(r)
        res = parse(r)
        total_results = res['rowcount']
        results = res['rows']  # doesn't need format_results
        return results, total_results
Exemple #10
0
    def decrypt(self, request, sessionid):
        """ Avoid showing plain sessionids
            Optionally require that a referer exists and matches the
            whitelist, or reset the session
        """
        if not sessionid:
            return ""

        # (nonce, sessionid) = sessionid.split(":", 1)
        # sessionid = self.xor(nonce, sessionid.decode("base64"))

        secret = self._secret(request)
        if self.settings.get("HOSTS", []):
            referer = request.META.get("HTTP_REFERER", "None")
            if referer == "None":
                # End session unless a referer is passed
                return ""
            url = parse(referer)
            if url.hostname not in self.settings["HOSTS"]:
                err = "%s is unauthorised" % url.hostname
                raise Exception(err)
        cipher = Fernet(secret)
        session_key = cipher.decrypt(sessionid)
        try:
            return str(session_key, "utf8")
        except:
            return ""
Exemple #11
0
def convert(data, field):
    if isinstance(data, Literal):
        data = data.value

    if isinstance(data, URIRef):
        return str(data)

    if isinstance(field, IndexedLanguageField):
        lng = {}
        for d in data:
            lang = d.language
            if not lang:
                lang = 'null'
            lng[lang] = str(d)
        return lng

    if isinstance(data, list):
        return [x for x in [convert(x, field) for x in data] if x]

    elif isinstance(field, IndexedDateTimeField):
        if data is None:
            return None
        if isinstance(data, str):
            data = parse(data)
        return data.strftime('%Y-%m-%dT%H:%M:%S')

    elif data and isinstance(data, FedoraObject):
        return data.id

    return data
Exemple #12
0
def list_pages(namespace_url=None):
    list_url = namespace_url or INDEX_INDEX
    print('Crawling {}'.format(list_url))
    tree = parse(list_url)

    for a in tree.xpath('//a[@class="twikilink"]'):
        name = a.text.strip()
        url = a.attrib['href']
        if namespace_url:
            yield (name,), url
        else:
            yield ('Main', name), url

    if not namespace_url:
        namespaces = tree.xpath(
            '//a[starts-with(@href, "index_report.php?groupname=")]'
        )

        for a in namespaces:
            namespace = a.text.strip()
            url = urllib.parse.urljoin(
                INDEX_INDEX, a.attrib['href']
            )
            for key, value in list_pages(url):
                assert len(key) == 1
                yield (namespace,) + key, value
Exemple #13
0
def parseParms(xfile):
    if debugMode():
        print("parseParms:",xfile)

    pdict = {}

    try:
        statxml = os.stat(xfile)
        
    except:
        print("Error, file",xfile,"not found")
        return None
    
    try:
        t = parse(xfile)       
    except:
        print("Error,could not parse",xfile)
        return None

    root = t.getroot()
    kids = list(root)
    for k in kids:
        pdict[k.tag] = k.text

    return pdict
Exemple #14
0
    def iratingchart(self, custid=None, category=ct.IRATING_ROAD_CHART):
        """ Gets the irating data of a driver using its custom id (custid) 
            that generates the chart located in the driver's profile. """

        r = self.__req(ct.URL_STATS_CHART % (custid, category),
                       cookie=self.last_cookie)
        return parse(r)
	def get_article(self, candidates, best_candidate):
		# Now that we have the top candidate, look through its siblings for content that might also be related.
		# Things like preambles, content split by ads that we removed, etc.

		sibling_score_threshold = max([10, best_candidate['content_score'] * 0.2])
		output = parse("<div/>")
		for sibling in best_candidate['elem'].parent.contents:
			if isinstance(sibling, NavigableString): continue
			append = False
			if sibling is best_candidate['elem']:
				append = True
			sibling_key = HashableElement(sibling)
			if sibling_key in candidates and candidates[sibling_key]['content_score'] >= sibling_score_threshold:
				append = True

			if sibling.name == "p":
				link_density = self.get_link_density(sibling)
				node_content = sibling.string or ""
				node_length = len(node_content)

				if node_length > 80 and link_density < 0.25:
					append = True
				elif node_length < 80 and link_density == 0 and re.search('\.( |$)', node_content):
					append = True

			if append:
				output.append(sibling)

		if not output: output.append(best_candidate)
		return output
Exemple #16
0
    def __check_cookie(self):
        """ Checks the cookie by testing a request response"""

        r = parse(self.__req(ct.URL_DRIVER_COUNTS, cookie=self.last_cookie))
        if isinstance(r, dict):
            return True
        return False
Exemple #17
0
def load_url(parse, url, max_requests = 1, timeout = 60) :

    requests = []

    for i in range( max_requests ) :

        req = {
            'url' : url ,
            'date' : arrow.now(TZ).format( TIMEFMT )
        }

        requests.append( req )

        try:

            with urllib.request.urlopen(url, timeout=timeout) as conn:

                req['code'] = conn.getcode()

                now = arrow.now(TZ)

                data = parse(conn)

                return LoadUrlResult( data , now , requests )

        except urllib.error.HTTPError as e :

            req['code'] = e.code

    now = arrow.now(TZ)
    raise LoadUrlException( now , requests )
def main():
  ''' download file and return it as string '''
  cj = http.cookiejar.CookieJar()
  opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
  urllib.request.install_opener(opener)

  inputhtml= urllib.request.urlopen(URL1).readlines()

  print(cj)
  imgdata = parse(inputhtml)
  writedata('img.png', imgdata)
  
  ocrfix()
  
  password = ocrdecode()
  print (password)
 
  postdata = post_data(password)
  print(postdata)

  responsehtml= urllib.request.urlopen(URL1, postdata).readlines()

  resultlines = list(map(lambda x: x.decode("utf-8"), responsehtml))
  for r in resultlines:
    print(r)
    def onSend(self):

        self.getFields()

        message = self.generateMessage().toLatin1()
        subject = self.subject_.toLatin1()

        params = urllib.parse(
            {
                "kontakt": "cad",
                "from_name": self.name_,
                "from_mail": self.email_,
                "subject": subject,
                "kommentar": message,
            }
        )

        headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"}

        conn = http.client.HTTPConnection("www.ipek.uni-karlsruhe.de:80")

        conn.request("POST", "/cms/de/kontakt/kontakt.php", params, headers)

        response = conn.getresponse()

        print(response.status, response.reason)

        data = response.read()

        conn.close()

        self.close()

        return
def inputMove():
    moves = []
    mc.events.clearAll()
    while len(moves) < 2:
        try:
            chats = mc.events.pollChatPosts()
            move = parse(chats[0].message)
            for m in moves:
                drawSquare(m[0],m[1])
            return move
        except:
            pass
        hits = mc.events.pollBlockHits()
        if len(hits) > 0:
            c = hits[0].pos
            if ( corner.x <= c.x and corner.y -1 <= c.y and corner.z <= c.z and
                 c.x < corner.x + 64 and c.y < corner.y + MAXHEIGHT and c.z < corner.z + 64 ):
                m = (c.x - corner.x) / 8, (c.z - corner.z) /8
                if len(moves) == 0 or m[0] != moves[0][0] or m[1] != moves[0][1]:
                    highlightSquare(m[0],m[1])
                    moves.append(m)
                    time.sleep(0.2)
                    mc.events.clearAll() # debounce
                    continue
            for m in moves:
                drawSquare(m[0],m[1])
            moves = []
            mc.postToChat('Canceled. Enter another move.')
            time.sleep(0.2)
            mc.events.clearAll() # debounce
        time.sleep(0.2)
    for m in moves:
        drawSquare(m[0],m[1])
    return tuple(moves)
def append_next_page(parsed_urls, page_index, page_url, doc, options):
    logging.debug("appending next page: %s" % page_url)
    fetcher = options["urlfetch"]
    html = fetcher.urlread(page_url)
    orig_page_doc = parse(html, page_url)
    next_page_url = find_next_page_url(parsed_urls, page_url, orig_page_doc)
    page_article = get_article(orig_page_doc, options)
    log.debug("Appending " + str(page_article))

    if page_article.html:
        page_doc = fragment_fromstring(page_article.html)
        make_page_elem(page_index, page_doc)

        if not is_suspected_duplicate(doc, page_doc):
            # page_doc is a singular element containing the page article elements.  We
            # want to add its children to the main article document to which we are
            # appending a page.
            if doc.tag == "html":
                children = doc.getchildren()
                if children[0].tag == "head":
                    for elem in page_doc:
                        doc.getchildren()[1].append(elem)
                else:
                    for elem in page_doc:
                        doc.getchildren()[0].append(elem)
            else:
                for elem in page_doc:
                    doc.append(elem)
            doc.append(page_doc)
            if next_page_url is not None:
                append_next_page(parsed_urls, page_index + 1, next_page_url, doc, options)
Exemple #22
0
 def __init__(self, content, name=None, namespace=None):
     self.content = content
     self._unicode = isinstance(content, str)
     self.name = name
     self._parsed = parse(content, name=name)
     if namespace is None:
         namespace = {}
     self.namespace = namespace
Exemple #23
0
    def driverdata(self, drivername):
        """ Personal data of driver  using its name in the request 
            (i.e drivername="Victor Beltran"). """

        r = self.__req(ct.URL_DRIVER_STATUS % (encode({
            'searchTerms': drivername})), cookie=self.last_cookie)
        # tofile(r)
        return parse(r)
 def _convertXML(self):
     """
     Convert an XML result into a Python dom tree. This method can be overwritten in a
     subclass for a different conversion method.
     @return: converted result
     @rtype: PyXlib DOM node
     """
     from xml.dom.minidom import parse
     return parse(self.response)
Exemple #25
0
    def driver_search(self, race_type=ct.RACE_TYPE_ROAD, location=ct.LOC_ALL,
                      license=(ct.LIC_ROOKIE, ct.ALL), irating=(0, ct.ALL),
                      ttrating=(0, ct.ALL), avg_start=(0, ct.ALL),
                      avg_finish=(0, ct.ALL), avg_points=(0, ct.ALL),
                      avg_incs=(0, ct.ALL), active=False,
                      sort=ct.SORT_IRATING, page=1, order=ct.ORDER_DESC):
        """Search drivers using several search fields. A tuple represent a 
           range (i.e irating=(1000, 2000) gets drivers with irating 
           between 1000 and 2000). Use ct.ALL used in the lower or 
           upperbound of a range disables that limit. Returns a tuple 
           (results, total_results) so if you want all results you should 
           request different pages (using page) until you gather all
           total_results. Each page has 25 (ct.NUM_ENTRIES) results max."""

        lowerbound = ct.NUM_ENTRIES * (page - 1) + 1
        upperbound = lowerbound + ct.NUM_ENTRIES - 1
        search = 'null'
        friend = ct.ALL  # TODO
        studied = ct.ALL  # TODO
        recent = ct.ALL  # TODO

        active = int(active)
        # Data to POST
        data = {'custid': self.custid, 'search': search, 'friend': friend,
                'watched': studied, 'country': location, 'recent': recent,
                'category': race_type, 'classlow': license[0],
                'classhigh': license[1], 'iratinglow': irating[0],
                'iratinghigh': irating[1], 'ttratinglow': ttrating[0],
                'ttratinghigh': ttrating[1], 'avgstartlow': avg_start[0],
                'avgstarthigh': avg_start[1], 'avgfinishlow': avg_finish[0],
                'avgfinishhigh': avg_finish[1], 'avgpointslow': avg_points[0],
                'avgpointshigh': avg_points[1], 'avgincidentslow':
                avg_incs[0], 'avgincidentshigh': avg_incs[1],
                'lowerbound': lowerbound, 'upperbound': upperbound,
                'sort': sort, 'order': order, 'active': active}

        total_results, drivers = 0, {}

        try:
            r = self.__req(ct.URL_DRIVER_STATS, data=data,
                           cookie=self.last_cookie)
            res = parse(r)
            total_results = res['d']['32']

            header = res['m']
            f = res['d']['r'][0]
            if int(f['29']) == int(self.custid):  # 29 is custid
                drivers = res['d']['r'][1:]
            else:
                drivers = res['d']['r']
            drivers = format_results(drivers, header)

        except Exception as e:
            pprint(("Error fetching driver search data. Error:", e),
                   self.verbose)

        return drivers, total_results
Exemple #26
0
def findDate(inputValue):
    try:
        DateTimeString = str(parse(inputValue, ignoretz=True))
    except:
        #if not found, epoch = 0
        DateTimeString = "1970-01-01 00:00:00"
     
    # Convert to epoch date time and return the value   
    return toEpoch(DateTimeString)
    def _summary(self, enclose_with_html_tag=True):
        # the first page parsed into a elementree element
        doc = self.html

        # the set of urls we've processed so far
        parsed_urls = set()
        url = self.options.get("url", None)
        if url is not None:
            parsed_urls.add(url)

        # check the current doc for a next page if requested
        if self.options.get("multipage", False):
            next_page_url = find_next_page_url(parsed_urls, url, doc)

            page_0 = get_article(doc, self.options)
            page_0_doc = fragment_fromstring(page_0.html)
            page_index = 0
            make_page_elem(page_index, page_0_doc)

            if enclose_with_html_tag:
                output = document_fromstring("<div/>")
                output.getchildren()[0].attrib["id"] = "article"
                output.getchildren()[0].append(page_0_doc)
            else:
                output = fragment_fromstring("<div/>")
                output.attrib["id"] = "article"
                output.append(page_0_doc)

            if next_page_url is not None:
                append_next_page(parsed_urls, page_index + 1, next_page_url, output, self.options)
            return Summary(
                tostring(output),
                page_0.confidence,
                short_title=shorten_title(output),
                title=get_title(output),
                description=get_description(output),
                keywords=get_keywords(output),
            )

        summary = get_article(doc, self.options, enclose_with_html_tag=enclose_with_html_tag)
        print(len(summary.html), "============================")
        if summary.title == "[something-wrong]" or len(summary.html) < 500:
            output = parse(self.input_doc, self.options.get("url"))
            remove_unlikely_candidates(output)
            o = open("something-wrong.txt", "w")
            print("[something-wrong]", tostring(output), file=o)
            return Summary(
                get_clean_html(output),
                0,
                short_title=shorten_title(output),
                title=get_title(output),
                description=get_description(output),
                keywords=get_keywords(output),
            )
        else:
            return summary
Exemple #28
0
    def get_result_dicts(cls, data, parser, mm_key=None, onlyif=None):
        if not hasattr(parser, "items"):
            parser = {"key": parser}

        if "key" not in parser:
            yield data
            return

        key = parser["key"]
        rex = None
        if "regex" in parser:
            rex = re.compile(parser["regex"], flags=re.I)

        if key == "@" and mm_key is not None:
            yield {key: mm_key}
            return

        values = cls.get_value(data, key)
        if values is None:
            return

        if not parser.get("match_all", False):
            values = [values]

        for val in values:
            result_dict = OrderedDict()

            if rex:
                m = rex.search(val)
                if not m:
                    return
                if len(m.groups()) > 0:
                    val = m.groups()
                    if len(val) == 1:
                        val = val[0]

            urldecode = str(parser.get("urldecode", False)).lower()
            if urldecode in ("1", "yes", "true"):
                val = urllib.parse.unquote(val)
            elif urldecode == "twice":
                val = urllib.parse.unquote(
                    urllib.parse.unquote(val)
                )

            if "format" in parser:
                if parser["format"] == "as_list":
                    val = ", ".join(map(str, val))
                elif parser["format"] == "as_time":
                    try:
                        dt = datetime.datetime.fromtimestamp(val)
                    except:
                        dt = parse(val)
                    val = dt.isoformat()
            result_dict[key] = val

            yield result_dict
Exemple #29
0
    def series_raceresults(self, season, raceweek):
        """ Gets races results of all races of season in specified raceweek """

        r = self.__req(ct.URL_SERIES_RACERESULTS, data={'seasonid': season,
                       'raceweek': raceweek})  # TODO no bounds?
        res = parse(r)
        header = res['m']
        results = res['d']
        results = format_results(results, header)
        return results
Exemple #30
0
def parse_environ(name, **default_vals):
    """
    same as parse() but you pass in an environment variable name that will be used
    to fetch the dsn

    name -- string -- the environment variable name that contains the dsn to parse
    **default_vals -- dict -- any values you want to have defaults for if they aren't in the dsn
    return -- ParseResult() tuple
    """
    return parse(os.environ[name], **default_vals)
Exemple #31
0
 def request(self, method: str, path: str = "", **kwargs):
     response = super().request(
         method, urllib.parse.urljoin(self.base_url.geturl(), path),
         **kwargs)
     response.parsed = lambda: parse(response)
     return response
Exemple #32
0
def rc_by_date(start, end):
    global df
    data_load()
    df = df[(df['Resolved'] > parse(start)) & (df['Resolved'] < parse(end))]
    #df = df_dt;
    return df.to_html()
Exemple #33
0
def load_schedule(filename):
    with open(filename, encoding='utf-8') as f:
        data = f.read()
        return parse(data)
Exemple #34
0
    def driver_search(self,
                      race_type=ct.RACE_TYPE_ROAD,
                      location=ct.LOC_ALL,
                      license=(ct.LIC_ROOKIE, ct.ALL),
                      irating=(0, ct.ALL),
                      ttrating=(0, ct.ALL),
                      avg_start=(0, ct.ALL),
                      avg_finish=(0, ct.ALL),
                      avg_points=(0, ct.ALL),
                      avg_incs=(0, ct.ALL),
                      active=False,
                      sort=ct.SORT_IRATING,
                      page=1,
                      order=ct.ORDER_DESC):
        """Search drivers using several search fields. A tuple represent a 
           range (i.e irating=(1000, 2000) gets drivers with irating 
           between 1000 and 2000). Use ct.ALL used in the lower or 
           upperbound of a range disables that limit. Returns a tuple 
           (results, total_results) so if you want all results you should 
           request different pages (using page) until you gather all
           total_results. Each page has 25 (ct.NUM_ENTRIES) results max."""

        lowerbound = ct.NUM_ENTRIES * (page - 1) + 1
        upperbound = lowerbound + ct.NUM_ENTRIES - 1
        search = 'null'
        friend = ct.ALL  # TODO
        studied = ct.ALL  # TODO
        recent = ct.ALL  # TODO

        active = int(active)
        # Data to POST
        data = {
            'custid': self.custid,
            'search': search,
            'friend': friend,
            'watched': studied,
            'country': location,
            'recent': recent,
            'category': race_type,
            'classlow': license[0],
            'classhigh': license[1],
            'iratinglow': irating[0],
            'iratinghigh': irating[1],
            'ttratinglow': ttrating[0],
            'ttratinghigh': ttrating[1],
            'avgstartlow': avg_start[0],
            'avgstarthigh': avg_start[1],
            'avgfinishlow': avg_finish[0],
            'avgfinishhigh': avg_finish[1],
            'avgpointslow': avg_points[0],
            'avgpointshigh': avg_points[1],
            'avgincidentslow': avg_incs[0],
            'avgincidentshigh': avg_incs[1],
            'lowerbound': lowerbound,
            'upperbound': upperbound,
            'sort': sort,
            'order': order,
            'active': active
        }

        total_results, drivers = 0, {}

        try:
            r = self.__req(ct.URL_DRIVER_STATS,
                           data=data,
                           cookie=self.last_cookie)
            res = parse(r)
            total_results = res['d']['32']

            header = res['m']
            f = res['d']['r'][0]
            if int(f['29']) == int(self.custid):  # 29 is custid
                drivers = res['d']['r'][1:]
            else:
                drivers = res['d']['r']
            drivers = format_results(drivers, header)

        except Exception as e:
            pprint(("Error fetching driver search data. Error:", e),
                   self.verbose)

        return drivers, total_results
Exemple #35
0
 def personal_best(self, custid=None, carid=0):
     """ Personal best times of driver (custid) using car 
         (carid. check self.CARS) set in official events."""
     r = self.__req(ct.URL_PERSONAL_BEST % (carid, custid),
                    cookie=self.last_cookie)
     return parse(r)
Exemple #36
0
 def yearly_stats(self, custid=None):
     """ Gets yearly stats (top5, top 10, etc.) of driver (custid)."""
     r = self.__req(ct.URL_YEARLY_STATS % (custid), cookie=self.last_cookie)
     # tofile(r)
     return parse(r)
Exemple #37
0
def solve():
    export_dir = pathlib.Path('export')
    conditions = parse(export_dir)
    print(''.join([conv(condition) for condition in conditions]))
Exemple #38
0
#iterate through tuple of RSS Feeds and look for pubDate
datereq = utc.localize(
    datereq
)  #localize the date requirement so we can compare it with the published date

i = 0
#int to name each item in list
feedList = []  #list to store all feed objects created

for x in rssfeeds:
    i = i + 1
    name = "r" + str(i)
    #make string name
    NewsFeed = feedparser.parse(x)  #parse newsfeed
    entry = NewsFeed.entries[1]  #get first entry from RSS feed
    dt = parse(entry.published
               )  #parse the date from the RSS feed pubDate format to datetime
    feedList.append(Feed(name, entry.title, dt,
                         x))  #creat object and add to feedList

if int(mindays) < 2:
    print("\nThese feeds had no activity for " + mindays + " day :")
else:
    print("\nThese feeds had no activity for " + mindays + " days :")

for feed in feedList:
    if feed.pubdate < datereq:  #compare published date to the required date for inactivity if before that then print info
        print("")
        print("Most Recent Article: " + feed.title)
        print("Date Published: " + str(feed.pubdate))
        print("RSS Feed URL: " + feed.url + "")
        print("")
Exemple #39
0
def main():
    url = 'https://sadovod.city/category/66'
    total_pages = get_total_pages(get_html(url, 1))
    for i in range(1, total_pages):
        print(i)
        parse(get_html(url, i))
                        fl.close()
                        total_time = time.time() - start
                        mb_sec = (os.path.getsize(o_file) /
                                  (1024 * 1024.0)) / total_time
                        print(f"{get_timenow()}: Speed: {mb_sec} MB/s")
                        print(f"{get_timenow()}: Total Time: {total_time} s")
                        temp_size = os.path.getsize(o_file)

        download_complete = temp_size == filesize_b

        if not download_complete:
            percent = temp_size / filesize_b * 100
            print(f"{get_timenow()}: Wrote  {temp_size} MB ({percent:.2f} %)")
            print(
                f"Download not completed somehow. "
                f"\n Restarting download from where we left off: ({temp_size/(1024 * 1024):.2f} MB)"
            )
        else:
            print(f"Download completed. Breaking loop. Wrote  {temp_size} MB")
            break

        cookie = get_request_cookies(inps)


if __name__ == '__main__':
    if len(sys.argv) == 1:
        sys.argv.append('-h')
    ### READ IN PARAMETERS FROM THE COMMAND LINE ###
    inps = parse()
    download(inps)
def parseStrDate(dateString):
    try:
        dateTimeObj = parse(dateString)
        return dateTimeObj
    except:
        return None
    def on_data(self, data):
        global f
        global filecnt
        global tweetcnt
        global chkFlag

        if tweetcnt >= numTweets and numTweets != 0:
            print("first")
            chkFlag = False
            return False

        if (filecnt >= 500):
            print("filecnt")
            chkFlag = False
            return False

        if (f.tell() >= 10485760):
            print("last")
            f.close()
            chkFlag = True
            filecnt += 1
            outputPath = dirName
            outputPath += '/'
            outputPath += 'twitter_data'
            outputPath += str(filecnt)
            outputPath += '.txt'
            f = open(outputPath, 'a')

        decoded = json.loads(data)

        username = str(decoded['user']['screen_name']).encode(
            "ascii", "ignore")
        userTweet = str(decoded['text']).encode("ascii", "ignore")
        userTweet = userTweet.replace('\n', ' ').replace('\t',
                                                         '').replace('\r', '')
        userTweetTime = str(decoded['created_at'])  #gets timestamp
        userLocation = str(decoded['user']['location']).encode(
            "ascii", "ignore")
        userCoords = str(decoded['coordinates']).encode("ascii", "ignore")
        userURLS = str(decoded['entities']['urls']).encode("ascii", "ignore")
        userData = "Date:" + userTweetTime + " Coords:" + userCoords[
            36:-1] + " User:"******" Text:" + userTweet

        userData += " Hashtags:"
        userHashtags = decoded['entities']['hashtags']
        if (userHashtags != "[]"):
            tmp = decoded['text']
            for Hashtags in userHashtags:
                userHashtags = str(Hashtags['text']).encode("ascii", "ignore")
                userData += userHashtags + " "

        #url
        pageTitle = None
        userData += " URL:"
        if userURLS != "[]":
            expanded_url = str(
                decoded['entities']['urls'][0]['expanded_url']).encode(
                    "ascii", "ignore")
            userData += expanded_url

            try:
                page = urllib.request.urlopen(expanded_url)
                p = parse(page)

                pageT = p.find(".//title")
                if (pageT != None):
                    pageTitle = str(p.find(".//title").text).encode(
                        "ascii", "ignore")
            except urllib.error.HTTPError as err:
                if err.code == 404:
                    print("Page not found!")
                elif err.code == 403:
                    print("Access denied!")
                else:
                    print("Error:", err.code)
            except urllib.error.URLError as err:
                print("URL error:", err.reason)
            except BadStatusLine:
                print("Could not fetch URL")

        userData += " Title:"
        if (pageTitle != None):

            pageTitle = re.sub('[^A-Za-z0-9]+', ' ', pageTitle)
            userData += pageTitle

        tweetcnt += 1
        print('Tweet:', tweetcnt, ' F.size = ', f.tell(), ' on file:', filecnt)
        userData += "\n"
        print(userData)
        f.write(userData)

        return True
Exemple #43
0
                      dest="minimize_boolean_attributes",
                      help="minimize boolean attributes")

    parser.add_option("", "--use-trailing-solidus", action="store_true",
                      default=False, dest="use_trailing_solidus",
                      help="use trailing solidus")

    parser.add_option("", "--space-before-trailing-solidus",
                      action="store_true", default=False,
                      dest="space_before_trailing_solidus",
                      help="add space before trailing solidus")

    parser.add_option("", "--escape-lt-in-attrs", action="store_true",
                      default=False, dest="escape_lt_in_attrs",
                      help="escape less than signs in attribute values")

    parser.add_option("", "--escape-rcdata", action="store_true",
                      default=False, dest="escape_rcdata",
                      help="escape rcdata element values")

    parser.add_option("", "--sanitize", action="store_true", default=False,
                      dest="sanitize", help="sanitize")

    parser.add_option("-l", "--log", action="store_true", default=False,
                      dest="log", help="log state transitions")

    return parser

if __name__ == "__main__":
    parse()
    data = {
        'working_hours': working_hours,
        'info': info,
        'ratings_histogram': ratings_histogram,
        'name': name,
        'phone': phone,
        'ratings': ratings,
        'address': address,
        'health_rating': health_rating,
        'price_range': price_range,
        'claimed_status': claimed_status,
        'reviews': reviews,
        'category': category,
        'website': website,
        'latitude': latitude,
        'longitude': longitude,
        'url': url
    }
    return data


if __name__ == "__main__":
    argparser = argparse.ArgumentParser()
    argparser.add_argument('url', help='yelp bussiness url')
    args = argparser.parse_args()
    url = args.url
    scraped_data = parse(url)
    yelp_id = url.split('/')[-1]
    with open("scraped_data_%s.json" % yelp_id, 'w') as fp:
        json.dump(scraped_data, fp, indent=4)
Exemple #45
0
 def career_stats(self, custid=None):
     """ Gets career stats (top5, top 10, etc.) of driver (custid)."""
     r = self.__req(ct.URL_CAREER_STATS % (custid), cookie=self.last_cookie)
     return parse(r)[0]
Exemple #46
0
#import xml.etree.ElementTree as ET
from xml.etree.ElementTree import parse
import ssl

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

#Actual Data
#Prompt for URL for xml file and parse the XML

link = input('Enter - ')
url = urlopen(link)
#url = urlopen('http://py4e-data.dr-chuck.net/comments_42.xml')
xmldoc = parse(url)

print("Retrieving ", link)

#Determine the number of count entries in xml tree
counts = xmldoc.findall('.//count')
#counts = xml_tree.findall('./comments/comment/count')
print("Count: ", (len(counts)))

#This will be a runninng tally of numbers extracted from xml
count_list = []

for item in xmldoc.iterfind('comments/comment'):
    count_number = int(item.findtext('count'))
    count_list.append(count_number)
Exemple #47
0
 def cars_driven(self, custid=None):
     """ Gets list of cars driven by driver (custid)."""
     r = self.__req(ct.URL_CARS_DRIVEN % (custid), cookie=self.last_cookie)
     # tofile(r)
     return parse(r)
Exemple #48
0
    def parse_product(self, response):

        tmp = []
        for p in response.xpath("//table//tr//td[2]"):
            tmp.append(p)

        title = tmp[0].xpath("./p/text()").extract()[0]
        url = urllib.parse.urljoin(self.download_path,
                                   tmp[3].xpath("./a/@href").extract()[0])

        def parse(title):

            print(title)
            product = version = date = None

            tmp = title.split(' ')
            product = tmp[0]

            if len(tmp) == 2:
                #MR814v1_070807 升级程序
                if '_' in tmp[0]:
                    tmp2 = tmp[0].split('_')
                    version = tmp2[0]
                    date = tmp2[1][:6]
                #MWR300T V1(081210)标准版
                elif tmp[1][0] in ['v', 'V']:
                    pass
                else:
                    tmp2 = tmp[1].split('_')
                    version = tmp2[0]
                    date = tmp2[1][:6]

            elif len(tmp) == 3:
                tmp2 = tmp[1].split('_')
                version = tmp2[0]
                date = tmp2[1]

            if version:
                if version[0] not in ['v', 'V']:
                    if 'v' in product:
                        t = product.split('v')
                        product = t[0]
                        version = t[1]

            #MR814v1_070807 升级程序
            if product.count('_'):
                tmp = product.split('_')
                product = tmp[0]
            if product.count('v'):
                product = product.split('v')[0]
            elif product.count('V'):
                product = product.split('v')[0]

            return product, version, date

        product, version, date = parse(title)

        item = FirmwareLoader(item=FirmwareImage())
        item.add_value("url", url),
        item.add_value("product", product),
        #item.add_value("date", date),
        #item.add_value("version", version),
        item.add_value("vendor", self.vendor),
        item.add_value("description", title)

        yield item.load_item()
Exemple #49
0
 def lastrace_stats(self, custid=None):
     """ Gets stats of last races (10 max?) of driver (custid)."""
     r = self.__req(ct.URL_LASTRACE_STATS % (custid),
                    cookie=self.last_cookie)
     return parse(r)
Exemple #50
0
def is_date(string):
    try:
        parse(string, fuzzy=False)
        return True
    except ValueError:
        return False
Exemple #51
0
    def results_archive(self,
                        custid=None,
                        race_type=ct.RACE_TYPE_ROAD,
                        event_types=ct.ALL,
                        official=ct.ALL,
                        license_level=ct.ALL,
                        car=ct.ALL,
                        track=ct.ALL,
                        series=ct.ALL,
                        season=(2014, 1, ct.ALL),
                        date_range=ct.ALL,
                        page=1,
                        sort=ct.SORT_TIME,
                        order=ct.ORDER_DESC):
        """ Search race results using various fields. Returns a tuple 
            (results, total_results) so if you want all results you should 
            request different pages (using page). Each page has 25 
            (ct.NUM_ENTRIES) results max."""

        format_ = 'json'
        lowerbound = ct.NUM_ENTRIES * (page - 1) + 1
        upperbound = lowerbound + ct.NUM_ENTRIES - 1
        #  TODO carclassid, seriesid in constants
        data = {
            'format': format_,
            'custid': custid,
            'seriesid': series,
            'carid': car,
            'trackid': track,
            'lowerbound': lowerbound,
            'upperbound': upperbound,
            'sort': sort,
            'order': order,
            'category': race_type,
            'showtts': 0,
            'showraces': 0,
            'showquals': 0,
            'showops': 0,
            'showofficial': 0,
            'showunofficial': 0,
            'showrookie': 0,
            'showclassa': 0,
            'showclassb': 0,
            'showclassc': 0,
            'showclassd': 0,
            'showpro': 0,
            'showprowc': 0,
        }
        # Events
        ev_vars = {
            ct.EVENT_RACE: 'showraces',
            ct.EVENT_QUALY: 'showquals',
            ct.EVENT_PRACTICE: 'showops',
            ct.EVENT_TTRIAL: 'showtts'
        }
        if event_types == ct.ALL:
            event_types = (ct.EVENT_RACE, ct.EVENT_QUALY, ct.EVENT_PRACTICE,
                           ct.EVENT_TTRIAL)

        for v in event_types:
            data[ev_vars[v]] = 1
        # Official, unofficial
        if official == ct.ALL:
            data['showofficial'] = 1
            data['showunoofficial'] = 1
        else:
            if ct.EVENT_UNOFFICIAL in official:
                data['showunofficial'] = 1
            if ct.EVENT_OFFICIAL in official:
                data['showofficial'] = 1

        # Season
        if date_range == ct.ALL:
            data['seasonyear'] = season[0]
            data['seasonquarter'] = season[1]
            if season[2] != ct.ALL:
                data['raceweek'] = season[2]
        else:
            # Date range
            tc = lambda s:\
                time.mktime(datetime.datetime.strptime(s, "%Y-%m-%d").
                            timetuple()) * 1000
            data['starttime_low'] = tc(date_range[0])  # multiplied by 1000
            data['starttime_high'] = tc(date_range[1])

        # License levels
        lic_vars = {
            ct.LIC_ROOKIE: 'showrookie',
            ct.LIC_A: 'showclassa',
            ct.LIC_B: 'showclassb',
            ct.LIC_C: 'showclassc',
            ct.LIC_D: 'showclassd',
            ct.LIC_PRO: 'showpro',
            ct.LIC_PRO_WC: 'showprowc'
        }

        if license_level == ct.ALL:
            license_level = (ct.LIC_ROOKIE, ct.LIC_A, ct.LIC_B, ct.LIC_C,
                             ct.LIC_D, ct.LIC_PRO, ct.LIC_PRO_WC)
        for v in license_level:
            data[lic_vars[v]] = 1
        r = self.__req(ct.URL_RESULTS_ARCHIVE,
                       data=data,
                       cookie=self.last_cookie)
        res = parse(r)
        total_results, results = 0, []
        if len(res['d']):
            total_results = res['d']['46']
            results = res['d']['r']
            header = res['m']
            results = format_results(results, header)

        return results, total_results
Exemple #52
0
def parse_iaga(lines, iagacode=None):
    '''
    KyotoWDC uses two format types: WDC, which is data specific, and 
    IAGA-2002, which is general for all data types.  This function is
    a general reader for this format.  It returns a dictionary of vectors,
    each corresponding to a column from the file.

    'lines' is simply a list of lines from the IAGA-formatted file.
    'iagacode', if given, should be a string containing the IAGA code for the
    file contents.  If given, this function will raise an
    exception if iagacode does not match the file's code.  This is 
    useful for ensure the correct data values are located in this file.
    '''

    from dateutil.parser import parse

    # Begin by parsing header; ensuring the correct file format.
    fmt=(lines.pop(0)).split()
    if (fmt[0]!='Format') or (fmt[1]!='IAGA-2002'):
        raise Exception('Data is not in IAGA-2002 format.')

    # Parse mandatory IAGA header lines.
    source=(lines.pop(0)).split()[1]
    lines.pop(0)
    code=(lines.pop(0)).split()[2]
    for i in range(8):
        lines.pop(0)
    
    # Check Iaga Code as necessary.
    if iagacode:
        if iagacode != code:
            raise Exception("IAGA Code does not match required code.")

    # Loop through and count optional header lines.
    nHead=12
    while True:
        line=lines.pop(0)
        if line[:2]!=' #': break
        nHead+=1

    # Parse column labels.  We don't need time or DOY.
    parts=line.lower().split()[3:-1]
    data={'time':[], 'doy':[]}
    for name in parts:
        data[name]=[]

    # Read all data.
    for l in lines:
        if l[-2]=='|':continue # skip repeat headers.

        p=l.split()
        data['time'].append(parse(' '.join(p[0:2])))
        data['doy'].append(int(p[2]))
        
        for i,name in enumerate(parts):
            data[name].append(float(p[i+3]))

    # Convert to dmarrays.
    for name in data:
        data[name]=dmarray(data[name])

    return data
Exemple #53
0
      f = open(f, "rb")
      parser.ParseFile(f)
      f.close()
    else:
      parser.ParseFile(f)
      
  except Exception as e:
    raise OwlReadyOntologyParsingError("OWL/XML parsing error in file %s, line %s, column %s." % (getattr(f, "name", "???"), parser.CurrentLineNumber, parser.CurrentColumnNumber)) from e
  
  return nb_triple



    
def _rindex(l):
  i = len(l) - 1
  while l[i] != "(": i -= 1
  return i

    


if __name__ == "__main__":
  filename = sys.argv[-1]

  import time
  t = time.time()
  nb_triple = parse(filename)
  t = time.time() - t
  print("# %s triples read in %ss" % (nb_triple, t), file = sys.stderr)
Exemple #54
0
def main():
    if not os.path.exists(datadir):
        os.makedirs(datadir)

    parse()
Exemple #55
0
    def search(self, search_string, season=None, episode=None):

        if season and episode:
            searches = self.se_ep(search_string, season, episode)
        else:
            searches = [search_string]

        # get token for api
        url = '{}?get_token=get_token&app_id=tvoverlord'.format(self.baseurl)
        try:
            r = requests.get(url)
        except requests.exceptions.ConnectionError:
            return []

        if r.status_code == 403:
            self.url = url
            return []

        j = r.json()

        token = j['token']

        torrents = []
        count = 0
        loop_number = 0
        for search in searches:
            # the torrentapi only allows one query every two seconds
            if count > 0:
                time.sleep(2)
            count += 1

            search_tpl = '{}?mode=search&search_string={}&token={}&format=json_extended&sort=seeders&limit=100&app_id=tvoverlord'
            search_string = urllib.parse.quote(search)
            url = search_tpl.format(self.baseurl, search_string, token)

            try:
                loop_number += 1
                self.logger.info('%s[%s]@%s via "%s"' % (self.job_id, self.shortname, loop_number, url))
                r = requests.get(url)
            except requests.exceptions.ConnectionError:
                # can't connect, go to next url
                continue

            results = r.json()
            if 'error_code' in results.keys() and results['error_code'] == 20:
                continue  # no results found

            try:
                shows = results['torrent_results']
            except KeyError:
                # no results
                continue

            for show in shows:
                torrent = Torrent()
                torrent.title = show['title']
                torrent.date = parse(show['pubdate'].split(' ')[0])
                torrent.size = int(show['size'])
                torrent.seeders = int(show['seeders'])
                torrent.magnet = show['download']
                torrent.tracker = self.shortname
                torrents.append(torrent)

            self.logger.info('%s[%s]@%s found %s result(s)' % (self.job_id, self.shortname, loop_number,
                                                                  len(torrents)))

            if len(torrents) != 0:
                return torrents

        # We got this far with no results
        self.logger.info('%s[%s] exiting without any results' % (self.job_id, self.shortname))
        return torrents
Exemple #56
0
        def _tokenize(s):
            """Removes conditional macros and splits string on macro boundaries"""
            def parse(inp):
                tree = []
                text = ''
                macro = ''
                buf = ''
                escape = False
                while inp:
                    c = inp.pop(0)
                    if c == '%':
                        c = inp.pop(0)
                        if c == '%':
                            text += c
                        elif c == '{':
                            if text:
                                tree.append(('t', text))
                                text = ''
                            while inp and c not in ':}':
                                c = inp.pop(0)
                                buf += c
                            if c == ':':
                                tree.append(('c', buf[:-1], parse(inp)))
                                buf = ''
                            elif c == '}':
                                tree.append(('m', buf[:-1]))
                                buf = ''
                        elif c == '(':
                            if text:
                                tree.append(('t', text))
                                text = ''
                            tree.append(('s', None, parse(inp)))
                        else:
                            if text:
                                tree.append(('t', text))
                                text = ''
                            while inp and (c.isalnum() or c == '_'):
                                c = inp.pop(0)
                                macro += c
                            tree.append(('m', macro))
                            macro = ''
                    elif c == '$':
                        text += c
                        c = inp.pop(0)
                        if c == '{':
                            text += c
                            escape = True
                    elif c == '}':
                        if escape:
                            text += c
                            escape = False
                        else:
                            if text:
                                tree.append(('t', text))
                            inp.append(c)
                            return tree
                    elif c == ')':
                        if text:
                            tree.append(('t', text))
                        inp.append(c)
                        return tree
                    else:
                        text += c
                if text:
                    tree.append(('t', text))
                return tree

            def traverse(tree):
                result = []
                for node in tree:
                    if node[0] == 't':
                        # split text nodes on usual separators
                        result.extend([t for t in re.split(r'(\.|-|_)', node[1]) if t])
                    elif node[0] == 'm':
                        m = '%{{{}}}'.format(node[1])
                        if MacroHelper.expand(m):
                            result.append(m)
                    elif node[0] == 'c':
                        if MacroHelper.expand('%{{{}:1}}'.format(node[1])):
                            result.extend(traverse(node[2]))
                    elif node[0] == 's':
                        # ignore shell expansions, push nonsensical value
                        result.append('@')
                return result

            inp = list(s)
            tree = parse(inp)
            return traverse(tree)
Exemple #57
0
 def parse(inp):
     tree = []
     text = ''
     macro = ''
     buf = ''
     escape = False
     while inp:
         c = inp.pop(0)
         if c == '%':
             c = inp.pop(0)
             if c == '%':
                 text += c
             elif c == '{':
                 if text:
                     tree.append(('t', text))
                     text = ''
                 while inp and c not in ':}':
                     c = inp.pop(0)
                     buf += c
                 if c == ':':
                     tree.append(('c', buf[:-1], parse(inp)))
                     buf = ''
                 elif c == '}':
                     tree.append(('m', buf[:-1]))
                     buf = ''
             elif c == '(':
                 if text:
                     tree.append(('t', text))
                     text = ''
                 tree.append(('s', None, parse(inp)))
             else:
                 if text:
                     tree.append(('t', text))
                     text = ''
                 while inp and (c.isalnum() or c == '_'):
                     c = inp.pop(0)
                     macro += c
                 tree.append(('m', macro))
                 macro = ''
         elif c == '$':
             text += c
             c = inp.pop(0)
             if c == '{':
                 text += c
                 escape = True
         elif c == '}':
             if escape:
                 text += c
                 escape = False
             else:
                 if text:
                     tree.append(('t', text))
                 inp.append(c)
                 return tree
         elif c == ')':
             if text:
                 tree.append(('t', text))
             inp.append(c)
             return tree
         else:
             text += c
     if text:
         tree.append(('t', text))
     return tree
Exemple #58
0
 def driver_counts(self):
     """ Gets list of connected myracers and notifications. """
     r = self.__req(ct.URL_DRIVER_COUNTS, cookie=self.last_cookie)
     return parse(r)
Exemple #59
0
        # print(final)
    except BaseException as e:
        print(e)
def parse(uid):
    #设置referer和headers
    Referer='https://m.weibo.cn/u/{}'.format(uid)
    headers_1 = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0",
        "Referer":Referer
    }
    #使用firefox抓包工具抓取真实的地址,并分析参数,需要设置UID和page
    for i in range(1,11):
        url = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}&containerid=107603{}&page={}'.format(uid,uid,i)
        #抓取页面后将json转换成dic并提取信息
        webdata = session.get(url, headers=headers_1).text
        data = json.loads(webdata)
        news = data['data']['cards']
        for new in news:
            try:
                info = new['mblog']['text']
                #替换所有的HTML标签
                import re
                dr = re.compile(r'\<.*?\>', re.S)
                dd = dr.sub('', info)
                print(dd)
            except BaseException as e:
                print(e)
login(username='******',password='******')
parse('1173544654')

Exemple #60
0
    def authenticate(self, request):
        if not settings.HAUKI_SIGNED_AUTH_PSK:
            return None

        params = get_auth_params(request)

        if not len(params):
            return None

        if not all([params.get(k) for k in REQUIRED_AUTH_PARAM_NAMES]):
            return None

        data_string = join_params(params)
        calculated_signature = calculate_signature(data_string)

        if not compare_signatures(params["signature"], calculated_signature):
            raise exceptions.AuthenticationFailed(_("Invalid signature"))

        try:
            created_at = parse(params["created_at"])
            try:
                if created_at > timezone.now():
                    raise exceptions.AuthenticationFailed(
                        _("Invalid created_at"))
            except TypeError:
                raise exceptions.AuthenticationFailed(_("Invalid created_at"))
        except ValueError:
            raise exceptions.AuthenticationFailed(_("Invalid created_at"))

        try:
            valid_until = parse(params["valid_until"])
            try:
                if valid_until < timezone.now():
                    raise exceptions.AuthenticationFailed(
                        _("Invalid valid_until"))
            except TypeError:
                raise exceptions.AuthenticationFailed(_("Invalid valid_until"))
        except ValueError:
            raise exceptions.AuthenticationFailed(_("Invalid valid_until"))

        # TODO: Add separate PSKs for different integrations and only allow access
        #       to users initially from the same integration. Also Only allow
        #       using organisations from the same integration.
        try:
            user = User.objects.get(username=params["username"])
        except User.DoesNotExist:
            user = User()
            user.set_unusable_password()
            user.username = params["username"]
            user.save()

        if not user.is_active:
            raise exceptions.AuthenticationFailed(
                _("User inactive or deleted."))

        if params.get("organization"):
            try:
                organization = Organization.objects.get(
                    id=params["organization"])
                users_organizations = user.organization_memberships.all()

                if organization not in users_organizations:
                    user.organization_memberships.add(organization)
            except Organization.DoesNotExist:
                # TODO: Should we raise exception here
                pass

        return user, None