コード例 #1
0
ファイル: scrapper.py プロジェクト: gerald-kim/snowball
def parse_naver_company(code):
    url = NAVER_COMPANY + code
    print('네이버 {}'.format(url))
    tree = tree_from_url(url)

    element = tree.xpath(
        '//*[@id="pArea"]/div[1]/div/table/tr[3]/td/dl/dt[2]/b')
    if not element:
        print('수집 실패')
        return False
    bps = parse_int(element[0].text)
    print('BPS: {}'.format(bps))

    element = tree.xpath(
        '//*[@id="pArea"]/div[1]/div/table/tr[3]/td/dl/dt[6]/b')
    if element:
        dividend_rate = parse_float(element[0].text)
        print('배당률: {}'.format(dividend_rate))
    else:
        dividend_rate = 0
        print('배당 수집 실패')
        return False

    stock = {
        'code': code,
        'bps': bps,
        'dividend_rate': dividend_rate,
        'use_fnguide': False,
    }
    stock = db.save_stock(stock)
    return stock
コード例 #2
0
    def quizzes():
        data = json.loads(request.data)
        previous_questions = data.get("previous_questions")
        category_id = parse_int(data.get("quiz_category").get("id"))

        if previous_questions is None or category_id is None:
            print("previous_questions", previous_questions)
            print("category_id", category_id)
            abort(400)

        questions = None
        if category_id == 0:
            questions = Question.query.all()
        else:
            questions = Question.query.filter_by(category=category_id).all()

        questions = [q.format() for q in questions]
        previous_set = set(previous_questions)
        available = [q for q in questions if q["id"] not in previous_set]

        new_question = None
        if len(available) > 0:
            new_question = random.choice(available)

        return jsonify({"success": True, "question": new_question}), 200
コード例 #3
0
def scrape_attraction_page(url):
    print "Open %s" % url
        
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')

    grade_labels = [u"Utmärkt", u"Mycket bra", u"Medelmåttigt", u"Dåligt", u"Hemskt"]
    grade_tags = soup.find_all("div", class_="valueCount")
    grade_values = [parse_int(x.text) for x in grade_tags]
    data = dict(zip(grade_labels, grade_values))


    breadcrumb_tags = soup.find("ul", {"id": "BREADCRUMBS"}).find_all("li")
    try:
        data["county"] = [x.text.strip() for x in breadcrumb_tags if u"län" in x.text][0]
    except IndexError:
        data["county"] = None

    data["city"] = soup.find("div", {"class": "slim_ranking"}).find("a").text\
        .replace(u"saker att göra i","").strip()

    tags = [x.text for x in soup.find("div", {"class": "heading_details"}).find_all("a")]
    data["tags"] = "|".join(tags)

    print data

    return data
コード例 #4
0
ファイル: parsers.py プロジェクト: Js41637/Overwatch-API
def parse_heroes(parsed):
    """Parses the playtime for all the heroes"""
    playtimes = {'quickplay': [], 'competitive': []}
    playtimestats = parsed.xpath(".//div[@data-category-id='overwatch.guid.0x0860000000000021']")
    for i, item in enumerate(playtimestats):
        built_heroes = []
        heroes = item.xpath(".//div[@class='bar-text']")
        for j, hero in enumerate(heroes):
            htime = hero.find(".//div[@class='description']").text
            # If the first hero has no playtime then we can assume that none have been played
            # and that they haven't played comp mode so we will ignore all the rest
            if htime == '--':
                if j == 0 and i == 1:
                    break
                else:
                    htime = '0'
            hname = hero.find(".//div[@class='title']").text
            cname = hname.replace(".", "").replace(": ", "").replace(u"\xfa", "u").replace(u'\xf6', 'o')
            time = utils.parse_int(htime, False)

            built_heroes.append({'name': hname, 'time': time, 'extended': '/hero/' + cname.lower()})

        if i == 0:
            playtimes['quickplay'] = built_heroes
        else:
            playtimes['competitive'] = built_heroes

    return playtimes
コード例 #5
0
    async def run(self):
        print(f'{self.__class__.__name__}: running')

        msg = await self.receive(timeout=30
                                 )  # wait for a message for 180 seconds

        if not msg:
            print(
                f'{self.__class__.__name__}: received no message, terminating!'
            )
            return

        reply = msg.make_reply()
        parsed_msg = msg.body.lower().split(" ")

        if len(parsed_msg) == 2 and parse_float(parsed_msg[0]) and parse_float(
                parsed_msg[1]):
            print(f'{self.__class__.__name__}: got param!')

            length = float(parsed_msg[0])
            max_vel = float(parsed_msg[1])

            self.agent.track.append((length, max_vel))
            reply.body = "Got Your params: length='{}', max_vel='{}'".format(
                self.agent.track[-1][0], self.agent.track[-1][1])
            self.agent.track_length += length
            next_state = COLLECTING_PARAMS
        elif len(parsed_msg) == 1 and parsed_msg[0] == 'finish':
            print(f'{self.__class__.__name__}: finishing!')

            reply.body = "Finished passing parameters! Broadcasting race..."
            next_state = SUBSCRIBING_TO_DRIVER
        elif len(parsed_msg) == 2 and parsed_msg[0] == 'laps' and parse_int(
                parsed_msg[1]):
            print(f'{self.__class__.__name__}: got laps number!')

            self.agent.laps = int(parsed_msg[1])

            reply.body = "Got Your laps number: laps='{}'".format(
                self.agent.laps)
            next_state = COLLECTING_PARAMS
        elif len(parsed_msg) == 1 and parsed_msg[0] == "default":
            self.agent.track.append((50, 10))
            self.agent.track.append((40, 15))
            self.agent.track.append((30, 5))
            self.agent.track.append((70, 20))
            self.agent.track.append((10, 3))
            self.agent.track_length = 200
            self.agent.laps = 5
            reply.body = "Finished passing parameters! Broadcasting race..."
            next_state = SUBSCRIBING_TO_DRIVER
        else:
            reply.body = paramsHelpMessage
            next_state = COLLECTING_PARAMS

        await self.send(reply)

        self.set_next_state(next_state)
コード例 #6
0
ファイル: matches.py プロジェクト: evrimulgen/iddaa-bets
    def __init__(self, md, weekid):  # md is match_data
        self.weekID = weekid
        self.matchID = int(md[10])
        self.detailID = int(md[0])
        self.datetime = datetime.strptime(md[7] + " " + md[6],
                                          '%d.%m.%Y %H:%M')
        self.league = md[26]
        self.team_1 = md[1]
        self.team_2 = md[3]
        self.mbs = parse_int(md[13])
        self.iy_goals_1 = parse_int(md[11])
        self.iy_goals_2 = parse_int(md[12])
        if self.iy_goals_1 is None or self.iy_goals_2 is None:
            self.iy_goals_1 = None
            self.iy_goals_2 = None
            self.ms_goals_1 = None
            self.ms_goals_2 = None
        else:
            self.ms_goals_1 = parse_int(md[8])
            self.ms_goals_2 = parse_int(md[9])

        self.was_played = self.ms_goals_1 is not None
        self.h1 = 0 if md[14] == '' else int(md[14])
        self.h2 = 0 if md[15] == '' else int(md[15])
        self.ratios = []
        res = {}
        res['mac'] = [parse_float(x) for x in md[16:19]]
        res['ilk'] = [parse_float(x) for x in md[33:36]]
        res['han'] = [parse_float(x) for x in md[36:39]]
        res['kar'] = [parse_float(x) for x in md[39:41]]
        res['cif'] = [parse_float(x) for x in md[19:22]]
        res['iy'] = [parse_float(x) for x in md[42:44]]
        res['au1'] = [parse_float(x) for x in md[44:46]]
        res['au2'] = [parse_float(x) for x in md[22:24]]
        res['au3'] = [parse_float(x) for x in md[46:48]]
        res['top'] = [parse_float(x) for x in md[29:33]]

        if self.was_played:
            self.results = get_results(self.iy_goals_1, self.iy_goals_2,
                                       self.ms_goals_1, self.ms_goals_2,
                                       self.h1, self.h2)

        self.ratios = concat([res[bet] for bet in BET_ORDER])
        if self.league != 'DUEL':
            self.fetch_details()
コード例 #7
0
    def __init__(self, md, weekid): # md is match_data
        self.weekID = weekid
        self.matchID = int(md[10])
        self.detailID = int(md[0])
        self.datetime = datetime.strptime(md[7] + " " + md[6], '%d.%m.%Y %H:%M')
        self.league = md[26]
        self.team_1 = md[1]
        self.team_2 = md[3]
        self.mbs = parse_int(md[13])
        self.iy_goals_1 = parse_int(md[11])
        self.iy_goals_2 = parse_int(md[12])
        if self.iy_goals_1 is None or self.iy_goals_2 is None:
	    self.iy_goals_1 = None
	    self.iy_goals_2 = None
	    self.ms_goals_1 = None
	    self.ms_goals_2 = None
        else:
            self.ms_goals_1 = parse_int(md[8])
            self.ms_goals_2 = parse_int(md[9])

        self.was_played = self.ms_goals_1 is not None
        self.h1 = 0 if md[14] == '' else int(md[14])
        self.h2 = 0 if md[15] == '' else int(md[15])
        self.ratios = []
        res = {}
        res['mac'] = [parse_float(x) for x in md[16:19]]
        res['ilk'] = [parse_float(x) for x in md[33:36]]
        res['han'] = [parse_float(x) for x in md[36:39]]
        res['kar'] = [parse_float(x) for x in md[39:41]]
        res['cif'] = [parse_float(x) for x in md[19:22]]
        res['iy'] = [parse_float(x) for x in md[42:44]]
        res['au1'] = [parse_float(x) for x in md[44:46]]
        res['au2'] = [parse_float(x) for x in md[22:24]]
        res['au3'] = [parse_float(x) for x in md[46:48]]
        res['top'] = [parse_float(x) for x in md[29:33]]
        
        if self.was_played:
            self.results = get_results(self.iy_goals_1, self.iy_goals_2,
                            self.ms_goals_1, self.ms_goals_2, self.h1, self.h2)

        self.ratios = concat([res[bet] for bet in BET_ORDER])
        if self.league != 'DUEL':
            self.fetch_details()
コード例 #8
0
def get_attractions_from_list(url):
    print "Open %s" % url
    attractions = []
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')

    list_items = soup.find("div", { "id": "FILTERED_LIST"})\
        .find_all("div", {"class": "entry"})

    try:
        next_page = BASE_URL + soup.find("a", class_="next")["href"]
    except:
        next_page = None

    for li in list_items:
        attraction = {}
        title_tag = li.find("div", { "class": "property_title" }).find("a")
        attraction["name"] = title_tag.text
        attraction["url"] = BASE_URL + title_tag["href"]
        attraction["rank"] = parse_int(li.find("div", class_="popRanking").text.strip().split(" ")[1])
        try:
            attraction["score"] = parse_float(li.find("img", class_="sprite-ratings")["alt"].split(" ")[0])
        except TypeError:
            attraction["score"] = None

        try:
            attraction["n_reviews"] = parse_int(li.find("span", class_="more").text.strip().split(" ")[0])
        except AttributeError:
            attraction["n_reviews"] = None
        print attraction
        attractions.append(attraction)
    
    print "Found %s attractions" % len(attractions)

    return {
        "data": attractions,
        "next_page_url": next_page,
    }
コード例 #9
0
ファイル: parsers.py プロジェクト: Js41637/Overwatch-API
def parse_game_stats(parsed):
    """Parses all the general game stats"""
    # Start with them filled in so if there is no stats for some reason it keeps the empty objects and stuff
    data = {
        "quickplay": {"overall_stats": {}, "game_stats": {}, "featured_stats": []},
        "competitive": {"overall_stats": {}, "game_stats": {}, "featured_stats": []}
    }
    stats = parsed.xpath(".//div[@data-group-id='stats' and @data-category-id='0x02E00000FFFFFFFF']")

    if len(stats) == 1:
        data["competitive"]["is_empty"] = True

    for i, item in enumerate(stats):
        overall_stats, game_stats, average_stats, featured_stats = {}, {}, {}, []

        # Fetch Game Stats
        for subbox in item:
            stats = subbox.findall(".//tbody/tr")
            for stat in stats:
                name, value = stat[0].text.lower().replace(" ", "_").replace("_-_", "_"), stat[1].text
                amount = utils.parse_int(value, False)
                if '_avg' in name.lower():
                    # Don't include average stats in the game_stats, use them for the featured stats section
                    average_stats[name.replace("_avg", "")] = amount
                else:
                    game_stats[name] = amount

        # Manually add KPD
        if 'eliminations' in game_stats and 'deaths' in game_stats:
            game_stats["kpd"] = round(game_stats["eliminations"] / game_stats["deaths"], 2)
        else:
            game_stats["kpd"] = None

        overall_stats = parse_overall_stats(game_stats, average_stats)

        # Generate Featured Stats
        for astat in average_stats:
            if average_stats[astat] != 0:
                if astat[:-1] in game_stats:
                    featured_stats.append({"name": astat.replace("_", " "), "avg": average_stats[astat], "value": game_stats[astat[:-1]]})
                else:
                    featured_stats.append({"name": astat.replace("_", " "), "avg": average_stats[astat], "value": game_stats[astat.replace('_per_10_min', '')]})

        if i == 0:
            data["quickplay"] = {"featured_stats": featured_stats, "game_stats": game_stats, "overall_stats": overall_stats}
        else:
            data["competitive"] = {"featured_stats": featured_stats, "game_stats": game_stats, "overall_stats": overall_stats}

    return data
def search(session, keyword):
    quoted_keyword = quote_plus(keyword)
    url = SEARCH_URL.format(keyword=quoted_keyword)
    con = session.get(url)

    if NO_SEARCH_RESULT_TEXT in con.text:
        raise NoSearchResultException('No search result.')

    d = pq(con.content)
    links = d(LINK_CSS)

    download_counts = d(DOWNLOAD_COUNT_CSS)
    download_counts = [parse_int(i.text_content()) for i in download_counts]

    l = []
    for i, download_counts in zip(links, download_counts):
        title = pq(i).text()
        url = ROOT_URL + i.get('href')

        _ = dict(title=title, url=url, download_counts=download_counts)
        l.append(_)
    return l
コード例 #11
0
def parse_message(msg):
    parsed_message = []
    i = 0
    last_open_str = 0

    def store_string_so_far():
        if last_open_str != i:
            parsed_message.append(stringify(msg[last_open_str:i]))

    while i < len(msg):
        if stringify(msg[i:(i+2)]) == '%d':
            store_string_so_far()
            int_bytes = msg[(i+2):(i+4)]
            parsed_int = parse_int(int_bytes)
            parsed_message.append(parsed_int)
            last_open_str = i + 4
            i = i + 4
        elif stringify(msg[i:(i+2)]) == '%f':
            store_string_so_far()
            float_bytes = msg[(i+2):(i+6)]
            parsed_float = parse_float(float_bytes)
            parsed_message.append(parsed_float)
            last_open_str = i + 6
            i = i + 6
        elif stringify(msg[i:(i+2)]) == '%l':
            store_string_so_far()
            uint32_bytes = msg[(i+2):(i+6)]
            parsed_uint32 = parse_uint32(uint32_bytes)
            parsed_message.append(parsed_uint32)
            last_open_str = i + 6
            i = i + 6
        else:
            if i+1 == len(msg):
                i += 1
                store_string_so_far()
            else:
                i += 1

    return parsed_message
コード例 #12
0
ファイル: scrapper.py プロジェクト: teddylee777/snowball
def parse_fnguide(code: str):
    print('종목 {} FnGuide...'.format(code))
    url = FNGUIDE + code
    print('FnGuide {}'.format(url))
    tree = tree_from_url(url)
    
    title = first_or_none(tree.xpath('//*[@id="giName"]/text()'))
    if not title:
        return False
    
    groups = first_or_none(tree.xpath('//*[@id="compBody"]/div[1]/div[1]/p/span[1]/text()'))
    groups = groups.split(' ')
    group = groups[1] if len(groups) > 1 else None
    
    subgroup = first_or_none(tree.xpath('//*[@id="compBody"]/div[1]/div[1]/p/span[4]/text()'))
    subgroup = subgroup.replace('\xa0', '')

    closing_month = first_or_none(tree.xpath('//*[@id="compBody"]/div[1]/div[1]/p/span[6]/text()'))
    closing_month = parse_int(closing_month.split(' ')[0][:-1])

    forward_per = parse_float(first_or_none(tree.xpath('//*[@id="corp_group2"]/dl[2]/dd/text()')))
    group_per = parse_float(first_or_none(tree.xpath('//*[@id="corp_group2"]/dl[3]/dd/text()')))
    
    dividend_rate = parse_float(first_or_none(tree.xpath('//*[@id="corp_group2"]/dl[5]/dd/text()')))
    
    relative_earning_rate = parse_float(first_or_none(tree.xpath('//*[@id="svdMainChartTxt13"]/text()')))
    
    momentums = tree.xpath('//*[@id="svdMainGrid1"]/table/tbody/tr[3]/td[1]/span/text()')
    momentums = [parse_float(m) for m in momentums]

    month1 = momentums[0] if len(momentums) >= 1 else 0
    month3 = momentums[1] if len(momentums) >= 2 else 0
    month6 = momentums[2] if len(momentums) >= 3 else 0
    month12 = momentums[3] if len(momentums) >= 4 else 0
    
    foreigner_weight = parse_float(first_or_none(tree.xpath('//*[@id="svdMainGrid1"]/table/tbody/tr[3]/td[2]/text()')))

    beta = parse_float(first_or_none(tree.xpath('//*[@id="svdMainGrid1"]/table/tbody/tr[4]/td[2]/text()')))

    stocks = first_or_none(tree.xpath('//*[@id="svdMainGrid1"]/table/tbody/tr[7]/td[1]/text()'))

    stocks = stocks.split('/ ')
    has_preferred_stock = False if stocks[1] == '0' else True
    
    floating_rate = parse_float(first_or_none(tree.xpath('//*[@id="svdMainGrid1"]/table/tbody/tr[6]/td[2]/text()')))

    YoY = parse_float(first_or_none(tree.xpath('//*[@id="svdMainGrid2"]/table/tbody/tr/td[4]/span/text()')))

    consensus_point = parse_float(first_or_none(tree.xpath('//*[@id="svdMainGrid9"]/table/tbody/tr/td[1]/text()')))
    consensus_price = parse_int(first_or_none(tree.xpath('//*[@id="svdMainGrid9"]/table/tbody/tr/td[2]/text()')))
    consensus_count = parse_int(first_or_none(tree.xpath('//*[@id="svdMainGrid9"]/table/tbody/tr/td[5]/text()')))

    bps = parse_int(first_or_none(tree.xpath('//*[@id="highlight_D_A"]/table/tbody/tr[19]/td[3]/text()')))

    try:
        years = tree.xpath('//*[@id="highlight_D_Y"]/table/thead/tr[2]/th/div/text()')
        years = [x.split('/')[0] for x in years]
        last_year_index = years.index(LAST_YEAR)
    except ValueError:
        print("** 작년 데이터 없음 **")
        return

    NPs = tree.xpath('//*[@id="highlight_D_Y"]/table/tbody/tr[3]/td/text()')
    NPs = [parse_float(x) for x in NPs]

    TAs = tree.xpath('//*[@id="highlight_D_Y"]/table/tbody/tr[6]/td/text()')
    TAs = [parse_float(x) for x in TAs]

    ROEs = tree.xpath('//*[@id="highlight_D_Y"]/table/tbody/tr[17]/td/text()')
    ROEs = [parse_float(x) for x in ROEs]

    EPSs = tree.xpath('//*[@id="highlight_D_Y"]/table/tbody/tr[18]/td/text()')
    EPSs = [parse_float(x) for x in EPSs]

    BPSs = tree.xpath('//*[@id="highlight_D_Y"]/table/tbody/tr[19]/td/text()')
    BPSs = [parse_float(x) for x in BPSs]

    DPSs = tree.xpath('//*[@id="highlight_D_Y"]/table/tbody/tr[20]/td/text()')
    DPSs = [parse_float(x) for x in DPSs]

    PERs = tree.xpath('//*[@id="highlight_D_Y"]/table/tbody/tr[21]/td/text()')
    PERs = [parse_float(x) for x in PERs]

    PBRs = tree.xpath('//*[@id="highlight_D_Y"]/table/tbody/tr[22]/td/text()')
    PBRs = [parse_float(x) for x in PBRs]

    DEPTs = tree.xpath('//*[@id="highlight_D_Y"]/table/tbody/tr[7]/td/text()')
    DEPTs = [parse_float(x) for x in DEPTs]

    stock = {
        'code': code,
        'group': group,
        'subgroup': subgroup,
        'closing_month': closing_month,
        'forward_per': forward_per,
        'group_per': group_per,
        'dividend_rate': dividend_rate,
        'relative_earning_rate': relative_earning_rate,
        'month1': month1,
        'month3': month3,
        'month6': month6,
        'month12': month12,
        'foreigner_weight': foreigner_weight,
        'beta': beta,
        'has_preferred_stock': has_preferred_stock,
        'floating_rate': floating_rate,
        'YoY': YoY,
        'consensus_point': consensus_point,
        'consensus_price': consensus_price,
        'consensus_count': consensus_count,
        'bps': bps,
        'use_fnguide': True,
        'last_year_index': last_year_index,
        'NPs': NPs,
        'TAs': TAs,
        'ROEs': ROEs,
        'EPSs': EPSs,
        'BPSs': BPSs,
        'DPSs': DPSs,
        'PERs': PERs,
        'PBRs': PBRs,
        'DEPTs': DEPTs,
    }
    db.save_stock(stock)
    return True
コード例 #13
0
    def retrieve_injury_details(self, injury_id, location: Locazione,
                                sector: Settore):
        """
        Metodo che apre la pagina dell'infortunio specificata da injury_id.
        Questa parte è necessaria per recuperare la descrizione completa,
        più gli ID relativi ai fattori. Con questi ID possiamo poi procedere
        ad eseguire le richieste POST per ottenere i JSON specifici sui
        fattori e sul dettaglio dell'infortunio.
        """

        # Apriamo la pagina corrispondente all'infortunio
        page_response = requests.get(self.injury_page_url + str(injury_id))
        page = BeautifulSoup(page_response.text, 'lxml')

        # Recuperiamo il JSON relativo ai dettagli dell'infortunio.
        # Il JSON è composto da vari JSON Objects, nella quale gran parte dei valori
        # sono ripetuti (Guardare l'esempio in assets).
        injury_details_response = requests.post(self.injury_json_url,
                                                str(injury_id))
        injury_details_response = injury_details_response.json()
        sleep(self.sleep_time)

        # Recuperiamo i tag h3 relativi alle sezioni "Descrizione della dinamica e dei relativi fattori".
        # Prendendo uno di questi tag e navigando al suo genitore, otterremo la div che contiene tutti i fattori
        # relativi ad un singolo lavoratore.
        factors_h3 = page.find_all(
            "h3", string=self.title_before_description_injury_page)

        accidents = []

        for (h3, o) in zip(factors_h3, injury_details_response):
            incident_description = h3.find_next_sibling().get_text().strip()

            factors_ids = [
                tag.get("onclick")[:-1].replace("apriDettagliFattore(", "")
                for tag in h3.parent.find_all("button", {
                    "onclick":
                    re.compile("apriDettagliFattore\([0-9]{1,6}\)")
                })
            ]
            factors = []
            for f_id in factors_ids:
                factors.append(self.retrieve_factor(f_id))
                sleep(self.sleep_time)

            worker = Lavoratore(
                sesso=o.get("sesso"),
                nazionalita=o.get("cittadinanza"),
                tipo_contratto=o.get("rapLav"),
                mansione=o.get("mansione"),
                anzianita=o.get("anzianita"),
                numero_addetti_azienda=parse_int(o.get("numAddetti")),
                attivita_prevalente_azienda=o.get("attPrev"),
            )

            # Lo stato dell'infortunio (Grave o Mortale) lo ricaviamo dai giorni di assenza: per convenzione, se il
            # i numero di giorni non è specificato, l'incidente è Mortale. Altrimenti, è Grave.
            accidents.append(
                Incidente(
                    id=o.get("codiceInfortunato"),
                    lavoratore=worker,
                    fattori=factors,
                    stato_infortunio=get_injury_state(
                        o.get("numeroGiorniAssenza")),
                    descrizione_della_dinamica=incident_description,
                    luogo_infortunio=o.get("luogo"),
                    attivita_lavoratore_durante_infortunio=o.get("tipoAtt"),
                    ambiente=o.get("agente"),
                    tipo_incidente=type_incident(o.get("variazEnergia")),
                    descrizione_incidente=o.get("incidente"),
                    agente_materiale=o.get("agenteMatInc"),
                    sede_lesione=o.get("sedeLesione"),
                    natura_lesione=o.get("naturaLesione"),
                    giorni_assenza_lavoro=parse_int(
                        o.get("numeroGiorniAssenza")),
                ))

        return Infortunio(
            id=injury_id,
            settore_attivita=Settore(sector),
            locazione=Locazione(location),
            data=parse_date(injury_details_response[0].get("dataInfortunio")),
            ora_ordinale=parse_int(
                injury_details_response[0].get("oraLavoro")),
            incidenti=accidents)
コード例 #14
0
    def handle(self, msg):
        if len(msg) == 0:
            print ('WARNING: Empty message')
            return
        msg_type = msg[0]
        if msg_type == ToComputer.DEBUG:
            # debug message
            subsystems = [
                'INFO',
                'ERROR',
                'CRON',
            ]
            if (0 > msg[1] or msg[1] >= len(subsystems)):
                print  ("WARNING: Unknown debug category: %d.." % (msg[1],))
                subsystem = 'UNKNOWN'
                print (stringify(msg[2:]))
            else:
                subsystem = subsystems[msg[1]]

            content = parse_message(msg[2:])
            content = ''.join([str(m) for m in content])
            content = '[%s] %s' % (subsystem, content)
            self.log(content)
        elif msg_type == ToComputer.GET_SETTINGS_REPLY:
            time_since_epoch_s = parse_uint32(msg[1:5])
            date = datetime.fromtimestamp(time_since_epoch_s)
            box_uid = msg[5]
            box_node_type = chr(msg[6])
            box_balance = parse_uint32(msg[7:11])
            state_of_charge       = parse_float(msg[11:15])
            uncertainty_of_charge = parse_float(msg[15:19])
            battery_capacity = parse_float(msg[19:23])

            off_threshold = parse_float(msg[23:27])
            red_threshold = parse_float(msg[27:31])
            yellow_threshold = parse_float(msg[31:35])

            balance_update_hours = parse_int(msg[35:37])
            balance_update_minutes = parse_int(msg[37:39])
            balance_update_ammount = parse_uint32(msg[39:43])

            #self.log('Time on device is ' + str(date))

            self.update_if_not_focused(self.ui_root.settings.box_time, str(date))
            self.update_if_not_focused(self.ui_root.settings.box_uid, str(box_uid))
            self.update_if_not_focused(self.ui_root.settings.box_node_type, str(box_node_type))
            self.update_if_not_focused(self.ui_root.settings.box_balance, str(box_balance))
            self.update_if_not_focused(self.ui_root.settings.state_of_charge,       str(state_of_charge))
            self.update_if_not_focused(self.ui_root.settings.uncertainty_of_charge, str(uncertainty_of_charge))
            self.update_if_not_focused(self.ui_root.settings.battery_capacity, str(battery_capacity))

            self.update_if_not_focused(self.ui_root.settings.off_threshold, str(off_threshold)[:6])
            self.update_if_not_focused(self.ui_root.settings.red_threshold, str(red_threshold)[:6])
            self.update_if_not_focused(self.ui_root.settings.yellow_threshold, str(yellow_threshold)[:6])

            self.update_if_not_focused(self.ui_root.settings.balance_update_hours, str(balance_update_hours))
            self.update_if_not_focused(self.ui_root.settings.balance_update_minutes, str(balance_update_minutes))
            self.update_if_not_focused(self.ui_root.settings.balance_update_ammount, str(balance_update_ammount))

        elif msg_type == ToComputer.DATA_LOGGER_REPLY:
            controller.get.data_logger.on_message(msg)
        else:
            print( 'WARNING: Uknown message type :', msg[0])
コード例 #15
0
ファイル: scrapper.py プロジェクト: gerald-kim/snowball
def parse_snowball(code):
    if not parse_basic(code):
        print('수집 실패')
        return

    if not parse_fnguide(code):
        print('FnGuide 수집실패')
        if not parse_naver_company(code):
            return

    print('종목 {} 스노우볼...'.format(code))
    url = NAVER_YEARLY % (code)
    tree = tree_from_url(url)

    try:
        years = list(
            filter(
                lambda x: x != '',
                map(lambda x: x.strip().split('/')[0],
                    tree.xpath('/html/body/table/thead/tr[2]/th/text()'))))
        last_year_index = years.index(LAST_YEAR)
    except ValueError:
        return

    tds = tree.xpath('/html/body/table/tbody/tr[22]/td')

    ROEs = [first_or_none(td.xpath('span/text()')) for td in tds]
    while ROEs and ROEs[-1] is None:
        ROEs.pop()

    if len(ROEs) == 0:
        print('*** ROE 정보가 없음 >>>')
        return

    CAPEXs = tree.xpath('/html/body/table/tbody/tr[17]/td/span/text()')
    CAPEXs = [parse_float(x) for x in CAPEXs]

    ROEs = [float_or_none(x) for x in ROEs]

    DEPTs = tree.xpath('/html/body/table/tbody/tr[24]/td/span/text()')
    DEPTs = [parse_float(x) for x in DEPTs]

    EPSs = tree.xpath('/html/body/table/tbody/tr[26]/td/span/text()')
    EPSs = [parse_float(x) for x in EPSs]

    PERs = tree.xpath('/html/body/table/tbody/tr[27]/td/span/text()')
    PERs = [parse_float(x) for x in PERs]

    BPSs = tree.xpath('/html/body/table/tbody/tr[28]/td/span/text()')
    BPSs = [parse_int(x) for x in BPSs]

    PBRs = tree.xpath('/html/body/table/tbody/tr[29]/td/span/text()')
    PBRs = [parse_float(x) for x in PBRs]

    #자산총계
    TAs = tree.xpath('/html/body/table/tbody/tr[8]/td/span/text()')
    TAs = [parse_int(x) for x in TAs]

    #당기순이익
    NPs = tree.xpath('/html/body/table/tbody/tr[5]/td/span/text()')
    NPs = [parse_int(x) for x in NPs]

    #영업활동현금흐름
    CFOs = tree.xpath('/html/body/table/tbody/tr[14]/td/span/text()')
    CFOs = [parse_int(x) for x in CFOs]

    #발행주식수
    TIs = tree.xpath('/html/body/table/tbody/tr[33]/td/span/text()')
    TIs = [parse_int(x) for x in TIs]

    stock = {
        'code': code,
        'ROEs': ROEs,
        'last_year_index': last_year_index,
        'PBRs': PBRs,
        'EPSs': EPSs,
        'TAs': TAs,
        'NPs': NPs,
        'CFOs': CFOs,
        'PERs': PERs,
        'TIs': TIs,
        'DEPTs': DEPTs,
        'BPSs': BPSs,
        'CAPEXs': CAPEXs,
    }
    stock = db.save_stock(stock)
    stock.save_record()

    parse_quarterly(code)
    parse_json(code)
コード例 #16
0
ファイル: scrapper.py プロジェクト: gerald-kim/snowball
def parse_fnguide(code: str):
    print('종목 {} FnGuide...'.format(code))
    url = FNGUIDE + code
    print('FnGuide {}'.format(url))
    tree = tree_from_url(url)

    title = first_or_none(tree.xpath('//*[@id="giName"]/text()'))
    if not title:
        return False

    groups = first_or_none(
        tree.xpath('//*[@id="compBody"]/div[1]/div[1]/p/span[1]/text()'))
    groups = groups.split(' ')
    group = groups[1] if len(groups) > 1 else None

    subgroup = first_or_none(
        tree.xpath('//*[@id="compBody"]/div[1]/div[1]/p/span[4]/text()'))
    subgroup = subgroup.replace('\xa0', '')

    closing_month = first_or_none(
        tree.xpath('//*[@id="compBody"]/div[1]/div[1]/p/span[6]/text()'))
    closing_month = parse_int(closing_month.split(' ')[0][:-1])

    forward_per = parse_float(
        first_or_none(tree.xpath('//*[@id="corp_group2"]/dl[2]/dd/text()')))
    group_per = parse_float(
        first_or_none(tree.xpath('//*[@id="corp_group2"]/dl[3]/dd/text()')))

    dividend_rate = parse_float(
        first_or_none(tree.xpath('//*[@id="corp_group2"]/dl[5]/dd/text()')))

    relative_earning_rate = parse_float(
        first_or_none(tree.xpath('//*[@id="svdMainChartTxt13"]/text()')))

    momentums = tree.xpath(
        '//*[@id="svdMainGrid1"]/table/tbody/tr[3]/td[1]/span/text()')
    momentums = [parse_float(m) for m in momentums]

    month1 = momentums[0] if len(momentums) >= 1 else 0
    month3 = momentums[1] if len(momentums) >= 2 else 0
    month6 = momentums[2] if len(momentums) >= 3 else 0
    month12 = momentums[3] if len(momentums) >= 4 else 0

    foreigner_weight = parse_float(
        first_or_none(
            tree.xpath(
                '//*[@id="svdMainGrid1"]/table/tbody/tr[3]/td[2]/text()')))

    beta = parse_float(
        first_or_none(
            tree.xpath(
                '//*[@id="svdMainGrid1"]/table/tbody/tr[4]/td[2]/text()')))

    stocks = first_or_none(
        tree.xpath('//*[@id="svdMainGrid1"]/table/tbody/tr[5]/td[1]/text()'))
    stocks = stocks.split('/ ')
    has_preferred_stock = False if stocks[1] == '0' else True

    floating_rate = parse_float(
        first_or_none(
            tree.xpath(
                '//*[@id="svdMainGrid1"]/table/tbody/tr[6]/td[2]/text()')))

    YoY = parse_float(
        first_or_none(
            tree.xpath(
                '//*[@id="svdMainGrid2"]/table/tbody/tr/td[4]/span/text()')))

    consensus_point = parse_float(
        first_or_none(
            tree.xpath('//*[@id="svdMainGrid9"]/table/tbody/tr/td[1]/text()')))
    consensus_price = parse_int(
        first_or_none(
            tree.xpath('//*[@id="svdMainGrid9"]/table/tbody/tr/td[2]/text()')))
    consensus_count = parse_int(
        first_or_none(
            tree.xpath('//*[@id="svdMainGrid9"]/table/tbody/tr/td[5]/text()')))

    bps = parse_int(
        first_or_none(
            tree.xpath(
                '//*[@id="highlight_D_A"]/table/tbody/tr[19]/td[3]/text()')))

    stock = {
        'code': code,
        'group': group,
        'subgroup': subgroup,
        'closing_month': closing_month,
        'forward_per': forward_per,
        'group_per': group_per,
        'dividend_rate': dividend_rate,
        'relative_earning_rate': relative_earning_rate,
        'month1': month1,
        'month3': month3,
        'month6': month6,
        'month12': month12,
        'foreigner_weight': foreigner_weight,
        'beta': beta,
        'has_preferred_stock': has_preferred_stock,
        'floating_rate': floating_rate,
        'YoY': YoY,
        'consensus_point': consensus_point,
        'consensus_price': consensus_price,
        'consensus_count': consensus_count,
        'bps': bps,
        'use_fnguide': True,
    }
    db.save_stock(stock)
    return True
コード例 #17
0
    def search_by_employee(self):

        while_breaker = 1

        while True:

            # This helps the testing, since these are kind of
            # infinite loops while testing, there's need for something
            # that can break them automatically
            if while_breaker > 100:
                break
            else:
                while_breaker += 1

            utils.cls()
            name = utils.get_input('Enter the name of the Employee: ')

            if not name:
                utils.pause('Error: Blank spaces are not allowed')
                continue
            else:
                break

        employees = Employee.select().where(Employee.name.contains(name))

        # Creating a list for get the
        employees_list = []

        # And another to display a selecting menu
        employees_menu = []

        if len(employees) > 1:
            counter = 1

            for employee in employees:
                employees_list.append(employee)
                employees_menu.append("{}) {}".format(counter, employee.name))
                counter += 1

        while_breaker = 1

        while True:

            # This helps the testing, since these are kind of
            # infinite loops while testing, there's need for something
            # that can break them automatically
            if while_breaker > 100:
                break
            else:
                while_breaker += 1

            utils.cls()
            print("Select one Employee: \n" + "\n".join(employees_menu))
            option = utils.parse_int(utils.get_input('>'))

            if option:
                if option not in range(len(employees) + 1) and option != 0:
                    utils.pause("Error: {} isn't a valid option")
                    continue
            else:
                utils.pause("Error: {} is not a number")
                continue

            try:
                employee = employees_list[option - 1]
            except IndexError:
                continue

            break

        tasks = (Task.select(Employee.name).join(Employee).where(
            Task.id_employee == employee.id_employee).naive())

        return tasks
コード例 #18
0
    def retrieve_injury_details(self, injury_id, **details):
        """
        Metodo che apre la pagina dell'infortunio specificata da injury_id.
        Questa parte è necessaria per recuperare la descrizione completa,
        più gli ID relativi ai fattori. Con questi ID possiamo poi procedere
        ad eseguire le richieste POST per ottenere i JSON specifici sui
        fattori e sul dettaglio dell'infortunio.

        In details sono contenute:
            - Locazione
            - Settore
            - StatoInfortunio
        """

        # Apriamo la pagina corrispondente all'infortunio
        self.driver.get(self.injury_page_url + str(injury_id))
        page = BeautifulSoup(self.driver.page_source, 'lxml')

        injury_description = \
            page.find("h3", string=self.title_before_description_injury_page) \
                .find_next_sibling() \
                .get_text() \
                .strip()

        factors_ids = [
            tag.get("onclick")[:-1].replace("apriDettagliFattore(", "")
            for tag in page.find_all(
                "button",
                {"onclick": re.compile("apriDettagliFattore\([0-9]{1,6}\)")})
        ]

        factors = []
        for f_id in factors_ids:
            factors.append(self.retrieve_factor(f_id))
            sleep(self.sleep_between_requests)

        response = requests.post(self.injury_json_url, injury_id)
        response = response.json()

        # Il JSON è composto da vari JSON Objects, nella quale gran parte dei valori
        # sono ripetuti (Guardare l'esempio in assets).

        workers = []
        for o in response:
            workers.append(
                Lavoratore(
                    lavoratore_id=o.get("codiceInfortunato"),
                    sesso=o.get("sesso"),
                    nazionalita=o.get("cittadinanza"),
                    tipo_contratto=o.get("rapLav"),
                    mansione=o.get("mansione"),
                    anzianita=o.get("anzianita"),
                    numero_addetti_azienda=parse_int(o.get("numAddetti")),
                    attivita_prevalente_azienda=o.get("attPrev"),
                    sede_lesione=o.get("sedeLesione"),
                    natura_lesione=o.get("naturaLesione"),
                    giorni_assenza_lavoro=parse_int(
                        o.get("numeroGiorniAssenza")),
                    luogo_infortunio=o.get("luogo"),
                    attivita_lavoratore_durante_infortunio=o.get("tipoAtt"),
                    ambiente_infortunio=o.get("agente"),
                    tipo_incidente=o.get("variazEnergia"),
                    incidente=o.get("incidente"),
                    agente_materiale_incidente=o.get("agenteMatInc")))

        return Infortunio(
            id=injury_id,
            stato=StatoInfortunio(details.get("StatoInfortunio")),
            settore_attivita=Settore(details.get("Settore")),
            locazione=Locazione(details.get("Locazione")),
            descrizione=injury_description,
            data=parse_date(response[0].get("dataInfortunio")),
            ora_ordinale=parse_int(response[0].get("oraLavoro")),
            fattori=factors,
            lavoratori=workers,
        )
コード例 #19
0
    def get_item(self, response):
        title = response.css('#productTitle::text').extract_first()
        title = title.strip() if title is not None else None
        # print(title)

        details_output = dict()
        ASIN = None

        details = response.css('#detail-bullets .content > ul > li')

        for detail in details:
            detail_name = detail.css('b::text').extract_first()
            detail_name = detail_name.replace(':', '').strip()

            detail = BeautifulSoup(detail.extract(), 'lxml')

            # Remove detail name's tag in each detail
            for span in detail.find_all('b'):
                span.extract()
            detail = Selector(text=str(detail))

            detail_values = detail.css('li ::text').extract()
            detail_values = utils.normalize_str_array(detail_values)

            detail_value = detail_values[0] if len(detail_values) > 0 else None

            # Parse ranks number
            if 'Amazon Best Sellers Rank' in detail_name:
                detail_value = detail_value.strip().split(' ')[0]
                detail_value = utils.parse_int(detail_value)

            if 'ASIN' in detail_name:
                ASIN = detail_value

            details_output[detail_name] = detail_value

        alt_images = response.css('#altImages img::attr(src)').extract()
        # print(alt_images)

        brand = response.css('#bylineInfo::text').extract_first()
        # print(brand)

        brand_url = response.css('#bylineInfo::attr(href)').extract_first()
        brand_url = response.urljoin(
            brand_url) if brand_url is not None else None
        # print(brand_url)

        price = response.css(
            '.snsPriceBlock .a-color-price::text').extract_first()
        if price is None:
            price = response.css('#priceblock_ourprice::text').extract_first()

        price = price.strip() if price is not None else None
        # print(price)

        description = response.css(
            '#productDescription p::text, #productDescription h3::text'
        ).extract()
        description = utils.normalize_str_array(description)
        # description = '\n'.join(description)
        # print(description)

        plus_desc = response.css('#aplus')
        plus_desc_html = plus_desc.css('.aplus-v2').extract_first()

        plus_desc_texts = plus_desc.css(
            '*:not(script):not(style)::text').extract()
        plus_desc_texts = utils.normalize_str_array(plus_desc_texts)
        plus_desc_text = '\n'.join(plus_desc_texts)

        features = response.css('#feature-bullets ul li ::text').extract()
        features = [feature.strip() for feature in features]
        # print(features)

        videos = response.css(
            '#vse-rel-videos-carousel .vse-video-item::attr(data-video-url)'
        ).extract()
        # print(videos)

        return {
            'ASIN': ASIN,
            'url': response.url,
            'title': title,
            'brand': {
                'name': brand,
                'url': brand_url
            },
            'alt_images': alt_images,
            'details': details_output,
            'price': price,
            'description': description,
            'plus_description': {
                'text': plus_desc_text,
                'html': plus_desc_html
            },
            'features': features,
            'videos': videos,
        }
コード例 #20
0
ファイル: parsers.py プロジェクト: Js41637/Overwatch-API
def parse_hero(parsed):
    """Goes through every hero and parses their game stats"""
    heroes = {}
    for key, hero in datastore.heroes.items():
        stats = parsed.xpath(".//div[@data-group-id='stats' and @data-category-id='{0}']".format(hero["id"]))
        heroes[key] = {
            "name": hero["name"],
            "class": hero["class"],
            "stats": {
                "quickplay": {"featured_stats": [], "general_stats": {}, "overall_stats": {}, "hero_stats": {}},
                "competitive": {"featured_stats": [], "general_stats": {}, "overall_stats": {}, "hero_stats": {}}
            }
        }

        if len(stats) == 0:
            heroes[key]["stats"]["quickplay"]["is_empty"] = True
            heroes[key]["stats"]["competitive"]["is_empty"] = True
            continue

        if len(stats) == 1:
            heroes[key]["stats"]["competitive"]["is_empty"] = True

        # Go through both QuickPlay and Competetive stats
        for stats_index, item in enumerate(stats):
            hero_stats, general_stats, overall_stats, average_stats, featured_stats = {}, {}, {}, {}, []

            try:
                hbtitle = item[0].find(".//h5[@class='stat-title']").text
            except AttributeError:
                hbtitle = item[0].find(".//span[@class='stat-title']").text

            if hbtitle == 'Hero Specific':
                hero_box = item[0]
                starting_pos = 1
            else:
                hero_box = None
                starting_pos = 0

            # Fetch Hero Specific Stats
            if hero_box is not None:
                for hstat in hero_box.findall(".//tbody/tr"):
                    name, value = hstat[0].text.lower().replace(" ", "_").replace("_-_", "_"), hstat[1].text
                    amount = utils.parse_int(value, False)
                    if '_avg' in name.lower():
                        # Don't include average stats in the general_stats, use them for the featured stats section
                        average_stats[name.replace("_avg", "")] = amount
                    else:
                        hero_stats[name] = amount

            # Fetch General Hero Stats
            for subbox in item[starting_pos:]:
                stats = subbox.findall(".//tbody/tr")
                for stat in stats:
                    name, value = stat[0].text.lower().replace(" ", "_").replace("_-_", "_"), stat[1].text
                    amount = utils.parse_int(value, False)
                    if '_avg' in name.lower():
                        # Don't include average stats in the general_stats, use them for the featured stats section
                        average_stats[name.replace("_avg", "")] = amount
                    else:
                        general_stats[name] = amount

            overall_stats = parse_overall_stats(general_stats, average_stats)

            # Manually add KPD
            if 'eliminations' in general_stats and 'deaths' in general_stats:
                general_stats["kpd"] = round(general_stats["eliminations"] / general_stats["deaths"], 2)
            else:
                general_stats["kpd"] = None

            # Generate Featured Stats
            for astat in average_stats:
                if astat in general_stats or astat in hero_stats:
                    if astat in hero_stats:
                        val = hero_stats[astat]
                    else:
                        val = general_stats[astat]
                    featured_stats.append({"name": astat.replace("_", " "), "avg": average_stats[astat], "value": val})

            if stats_index == 0:
                heroes[key]["stats"]["quickplay"] = {
                    "featured_stats": featured_stats, "general_stats": general_stats, "overall_stats": overall_stats, "hero_stats": hero_stats
                }
            else:
                heroes[key]["stats"]["competitive"] = {
                    "featured_stats": featured_stats, "general_stats": general_stats, "overall_stats": overall_stats, "hero_stats": hero_stats
                }

    return heroes
コード例 #21
0
def challenge():
    """Request and submit challenges
    """
    # Reject if not authenticated
    if 'username' not in session:
        return redirect('/login')

    if request.method == 'GET':
        # Display challenge
        # Request challenge if no current problem
        if 'problem' not in session or session['problem'] == None:
            method = random.choice(list(SchedulingMethod))
            problem = Problem.generate(method, n_processes=3)
            session['problem'] = problem.to_json()
        else:
            problem = Problem.from_json(session['problem'])

        u = User.query.filter_by(username=session['username']).first()

        return render_template('challenge.html',
                               username=session['username'],
                               problem=problem,
                               score=u.score)

    else:
        # Submit challenge
        # Request new problem if no current problem
        if 'problem' not in session or session['problem'] == None:
            return redirect('/challenge')

        problem = Problem.from_json(session['problem'])

        # Solve problem
        solver = Solver(problem)
        ans = solver.solve()

        # Parse and validate input
        is_correct = True
        for i, (finish_t, wait_t) in enumerate(ans):
            finish_t_guess = parse_int(request.form[f'finish_{i}'])
            if finish_t_guess != finish_t:
                is_correct = False
                break

            wait_t_guess = parse_int(request.form[f'wait_{i}'])
            if wait_t_guess != wait_t:
                is_correct = False
                break

        # Award points
        # Correct, 1 point
        # Incorrect, 0 points
        u = User.query.filter_by(username=session['username']).first()
        if is_correct:
            u.score += 1
            db.session.commit()

        # Reset problem
        session['problem'] = None

        return render_template(
            'challenge_done.html',
            username=session['username'],
            problem=problem,
            is_correct=is_correct,
            answer_times=ans,
            score=u.score,
        )