def extractData(results):
    houseArray = []
    for column in results:
        try:
            houseDict = {}
            result = column.text.split("\n")
            address = result[0]
            print(result)
            for line in result[:-1]:
                if "MLS" in line:
                    houseDict["MLS"] = line
                if " • " in line:
                    type, storey = line.split(" • ")
                    houseDict["style"] = type
                    houseDict["storeys"] = storey

                blurb = result[len(result) - 1:]
                houseDict["blurb"] = blurb

                houseDict["address"] = address
                if 'Show info on strata building' in result[2]:
                    area = result[3]
                    houseDict["area"] = area
                elif "$" in result[2]:
                    area = result[1]
                    houseDict["area"] = area

                if '     ' in line:
                    bed = line[:1]
                    bath = line[-1:]
                    houseDict["beds"] = bed
                    houseDict["baths"] = bath
                if "Asking Price" in line:
                    value = float(price_str(line))
                    houseDict["price"] = value
                if "Assessed Value" in line:
                    houseDict["assessed"] = line[len("Assessed Value"):]
                if "Size of House" in line:
                    value = float(price_str(line))
                    houseDict["size"] = value
                if "Strata Fee" in line:
                    value = float(price_str(line))
                    houseDict["strata"] = value
                if "Property Taxes" in line:
                    houseDict["tax"] = line[len("Property Taxes"):]
                if "Ownership floaterest" in line:
                    houseDict["ownership"] = line[len("Ownership floaterest"):]
                if "Age of House" in line:
                    houseDict["age"] = line[len("Age of House"):]
                if "Basement" in line:
                    houseDict["basement"] = line[len("Basement"):]
                if "Price per SqFt" in line:
                    value = float(price_str(line))
                    houseDict["pricebyft"] = value
            houseArray.append(houseDict)
        except BaseException:
            pass
    return houseArray
Exemple #2
0
def extract_total_from_invoice_text(invoice_text):
    # determine if text might qualify as invoice by looking for currency symbols
    if any(cs in invoice_text for cs in currency_signs):
        prices = []
        for line in invoice_text.splitlines():
            # determine if a number is present in the line
            if any(char.isdigit() for char in line):
                # check for currency symbols, eliminating dates or miscellaneous numbers
                if any(cs in line for cs in currency_signs):
                    # remove alphabetical characters
                    price_string = re.sub('[a-zA-z]', '', line)
                    # convert string to float price value
                    price = float(price_str(price_string))
                    prices.append(price)
        # if more than two amounts of currency have been detected in the documents text,
        # we will assume that the document can be classified as an invoice.
        # obviously this is a pretty rough mechanism and it may be prone to accept letters
        # or e-mails as invoices.
        if len(prices) > 2:
            total = max(prices)
            print(f'determined invoice with total: {total}')
            return total
    else:
        print(
            "no currency sign detected, assuming the document is not an invoice"
        )
        return 0
Exemple #3
0
    def parse_item(self, response):
        print('\x1b[1;30;43m' + 'RUNNING-ITEM' + '\x1b[0m')
        print('\x1b[1;35;40m' + response.request.url + '\x1b[0m')
        title = response.xpath('//h1[@class="product_title entry-title"]/text()').extract_first() 
        price = price_str(response.xpath('//div[@class="product-price"]/p/span/text()').extract_first())
        description = response.xpath('//div[@class="product_meta"]/span/a/text()').extract() 
        img = response.xpath('//*[@id="product-image"]/div/@data-thumb').extract_first()
        url = response.request.url
        location = 'Malabe, Moratuwa, online'

        print('\x1b[6;30;42m' + title  + '\x1b[0m')
        print('\x1b[6;30;42m' + str(price)  + '\x1b[0m')
        print('\x1b[6;30;42m' + str(description)  + '\x1b[0m')
        print('\x1b[6;30;42m' + str(img)  + '\x1b[0m')
        print('\x1b[6;30;42m' + str(url)  + '\x1b[0m')

        yield {
                'title' : title,
                'price' : float(price),
                'description' : description,
                'img' : img,
                'url' : url,
                'location' : location,
                'store' : "Scion",
                'condition': "New"
        }
def parse_barclays(msgli):
    dateli = []
    descli = []
    amtli = []

    for msg in msgli:
        # getting dates
        headerli = msg['payload']['headers']
        for nestdic in headerli:
            if nestdic['name'] == 'Date':
                date = parser.parse(nestdic['value']).date()
                dateli.append(date)

        snippet = msg['snippet']

        # getting amounts
        amount_index = snippet.find("purchase")
        amt_string = snippet[amount_index:]
        amtli.append(price_str(amt_string))

    barclays_df = pd.DataFrame(
        data={
            'Date': dateli,
            'Description': ['n/a'] * len(dateli),
            'Amount': amtli,
            'Source': ['Barclays'] * len(dateli)
        })

    return barclays_df
def parse_citi(msgli):
    import base64
    dateli = []
    descli = []
    amtli = []

    for msg in msgli:
        # decode raw base64url encoded string to bytes
        decoded = base64.urlsafe_b64decode(msg['raw'] + '=' *
                                           (4 - len(msg['raw']) % 4))

        # search for relevant snippet with info
        og = True

        start_index = decoded.find(b"Account #: XXXX")
        end_index = decoded.find(b"exceeds the $0.00 transaction amount")

        if start_index == -1:
            og = False
            start_index = decoded.find(b"Citi Alert:")
            end_index = decoded.find(b"on card ending in")
            date_start_index = decoded.find(b"<*****@*****.**>;")

        snippet = decoded[start_index:end_index]
        snippet = snippet.decode('utf-8')  # convert snippet to string

        # amount, description, date
        if og == True:
            amount_index = snippet.find("Account #: XXXX")
            where_index = snippet.find("at")
            date_index = snippet.find("on")

            amt_string = snippet[amount_index + 20:where_index]
            where_string = snippet[where_index + 2:date_index]
            date_string = snippet[date_index + 2:date_index + 13]

        if og == False:
            amount_index = snippet.find("A ")
            where_index = snippet.find("at")

            amt_string = snippet[amount_index:where_index]
            where_string = snippet[where_index + 2:end_index]

            date_string = decoded[date_start_index + 26:date_start_index + 43]
            date_string = date_string.decode('utf-8')

        amtli.append(price_str(amt_string))
        descli.append(where_string)
        date = parser.parse(date_string).date()
        dateli.append(date)

    citi_df = pd.DataFrame(
        data={
            'Date': dateli,
            'Description': descli,
            'Amount': amtli,
            'Source': ['Citi Double Cash'] * len(dateli)
        })

    return citi_df
Exemple #6
0
 def parse_item(self, response):
     print('\x1b[1;30;43m' + 'RUNNING-ITEM' + '\x1b[0m')
     print('\x1b[1;35;40m' + response.request.url + '\x1b[0m')
     Title = response.xpath('//span[@class="nameCont"]/text()').extract_first()
     #Price = response.xpath('//span[@class="price"]/text()').extract()[1]
     Price = price_str(response.xpath('//span[@class="price"]/text()').extract()[1])
     #description = list(response.xpath('//*[@id="product-attribute-specs-table"]/tbody/tr[10]/td/text()').extract())
     description = re.sub( '\s+', ' ', unicodedata.normalize("NFKD",''.join(list(response.xpath('//*[@id="product-attribute-specs-table"]/tbody/tr[10]/td/text()').extract()))) ).strip()
     img = response.xpath('//*[@id="image-0"]/@src').extract_first()
     url = response.url
     location = 'online'
     print('\x1b[6;30;42m' + Title + '\x1b[0m')
     print('\x1b[6;30;42m' + str(Price) + '\x1b[0m')
     print('\x1b[6;30;42m' + str(description) + '\x1b[0m')
     print('\x1b[6;30;42m' + img + '\x1b[0m')
     print('\x1b[6;30;42m' + url + '\x1b[0m')
     print('\x1b[6;30;42m' + location + '\x1b[0m')
     #Stock = response.xpath('//p[@class="availability in-stock"]/span/text()').extract_first()
     #Warranty = response.xpath('//div[@class="ProductInfo warranty d-flex"]/span/text()').extract_first()
     #Discription = list(response.xpath('//div[@id="pd1"]/ul/li/text()').extract())
     #print('\x1b[6;30;42m' + str(Stock) + '\x1b[0m')
     #print('\x1b[6;30;42m' + Warranty + '\x1b[0m')
     #print('\x1b[6;30;42m' + str(Specification) + '\x1b[0m')
     
     yield {
             'title' : Title,
             'price' : float(Price),
             'description' : description,
             'img' : img,
             'url' : url,
             'location' : location,
             'store' : "ClicknShop",
             'condition' : "New"
     }
Exemple #7
0
    def parse_item(self, response):
        print('\x1b[1;30;43m' + 'RUNNING-ITEM' + '\x1b[0m')
        print('\x1b[1;35;40m' + response.request.url + '\x1b[0m')
        title = response.xpath(
            '//*[@id="product"]/div/div[2]/h1/text()').extract_first()
        price = price_str(
            response.xpath('//*[@id="item_price"]/text()').extract_first())
        description = re.sub(
            '\s+', ' ',
            unicodedata.normalize(
                "NFKD", ''.join(
                    response.xpath('//*[@id="product-detail"]/*//text()').
                    extract()))).strip()
        img = response.xpath('//*[@id="zoom_03"]/@src').extract_first()
        url = response.url
        location = 'online'

        print('\x1b[6;30;42m' + title + '\x1b[0m')
        print('\x1b[6;30;42m' + str(price) + '\x1b[0m')
        print('\x1b[6;30;42m' + str(description) + '\x1b[0m')
        print('\x1b[6;30;42m' + str(img) + '\x1b[0m')
        print('\x1b[6;30;42m' + str(url) + '\x1b[0m')

        yield {
            'title': title,
            'price': float(price),
            'description': description,
            'img': img,
            'url': url,
            'location': location,
            'store': "Buyabans",
            'condition': "New"
        }
Exemple #8
0
 def parse_item(self, response):
     title = response.xpath('//h1[@class="title"]/text()').extract_first()
     price = response.xpath('//span[@class="price"]/span[@dir="ltr"]/@data-price').extract_first()
     if(str(price)=='None'):
         price = response.xpath('//span[@class="price -no-special"]/span[@dir="ltr"]/@data-price').extract_first()
         print('\x1b[6;30;43m' + str(price)  + '\x1b[0m')
     else:
         print('\x1b[6;30;42m' + str(price)  + '\x1b[0m')
     #img = response.xpath('//img/@src').extract_first()
     #description = response.xpath('/html/body/main/section[1]/div[2]/div[1]/div[5]/div[2]/ul/li/text()').extract()
     #description = re.sub( '\s+', ' ', unicodedata.normalize("NFKD",''.join(list(response.xpath('/html/body/main/section[1]/div[2]/div[1]/div[5]/div[2]/ul/li/text()').extract()))) ).strip()
     #brand = response.xpath('//div[@class="sub-title"]/a/text()').extract_first()
     url = response.url
     location = 'online'
     print('\x1b[6;30;42m' + str(title)  + '\x1b[0m')
     #print('\x1b[6;30;42m' + str(img)  + '\x1b[0m')
     #print('\x1b[6;30;42m' + str(description)  + '\x1b[0m')
     print('\x1b[6;30;42m' + str(price)  + '\x1b[0m')
     print('\x1b[6;30;42m' + str(url)  + '\x1b[0m')
     yield{
                 'title':title,
                 'price':float(price_str(price)),
                 #'img':img,
                 #'description':description,
                 'url':url,
                 'location' : location
     }
Exemple #9
0
def parse_html(source):
    mls_listings = []
    soup = BeautifulSoup(source, 'html.parser')
    count = 0
    results_count_el = soup.select_one(
        shared.PARAMS['selector']['results_count_ure'])
    expected_count = extract_results_count(results_count_el.text)
    for property_card in soup.select(
            shared.PARAMS['selector']['property_card']):
        if not ('class' in property_card.attrs
                and shared.PARAMS['selector']['property_card_class']
                in property_card.attrs['class']):
            continue
        count += 1
        mls = int(property_card.attrs['listno'])
        openhouse_label_el = property_card.select_one(
            shared.PARAMS['selector']['openhouse_label'])
        if openhouse_label_el:
            open_house = openhouse_label_el.text
            open_house = " ".join(open_house.split())
        else:
            open_house = ''
        property_details = property_card.select(
            shared.PARAMS['selector']['property_details'])[0]
        status = property_details.select_one(
            shared.PARAMS['selector']['status']).contents[2].strip()
        listing_details_el = property_details.select_one(
            shared.PARAMS['selector']['listing_details'])
        list_price_el = listing_details_el.select_one(
            shared.PARAMS['selector']['list_price'])
        list_price_str = list_price_el.text
        list_price = int(money_parser.price_str(list_price_str))
        details_str = listing_details_el.contents[2].strip()
        bedrooms = int(re.match(r'^(\d+) bds .*', details_str).group(1))
        bathrooms = int(re.match(r'.* (\d+) ba .*', details_str).group(1))
        sqft = int(re.match(r'.* (\d+) SqFt\.$', details_str).group(1))
        address = property_details.select_one(
            shared.PARAMS['selector']['address']).text.strip()
        address = " ".join(address.split())
        listing_agent = property_details.select_one(
            shared.PARAMS['selector']['listing_agent']).text.strip()
        image_url = property_card.select_one(
            shared.PARAMS['selector']['listing_img_ure']).attrs['src']
        # ['mls', 'address', 'price', 'status', 'bedrooms', 'bathrooms', 'sqft', 'agent', 'open_house', 'source',
        # 'image_url']
        listing = shared.MLS(mls, address, list_price, status, bedrooms,
                             bathrooms, sqft, listing_agent, open_house,
                             shared.SOURCE_URE, image_url)
        validate_listing(listing)
        mls_listings.append(listing)
    if count == 0:
        raise ValueError('No listings found')
    return expected_count, mls_listings
Exemple #10
0
def find_dollar_amount(given_text):
    try:
        dollar_amount = price_str(given_text)
    except ValueError:
        try:
            dollar_amount = re.findall(r"[$](\d+(?:\.\d{2})?)", given_text)[0]
        except:
            dollar_amount = False
    except:
        dollar_amount = False

    return dollar_amount
Exemple #11
0
 async def do_request(session_rico, url):
     data = {}
     resp = []
     for v in (BeautifulSoup(
             session_rico.post(url, headers=HEADER, data=data,
                               timeout=20).text,
             'html.parser').findAll('b')):
         if not v.get_text().strip() == '':
             try:
                 resp.append(price_str(v.get_text().strip()))
             except Exception as e:
                 continue
     return resp
Exemple #12
0
def main():
    if PROMETHEUS_GATEWAY_ENDPOINT is not None and PROMETHEUS_JOB_NAME is not None:
        prometheus_registry = CollectorRegistry()
    else:
        prometheus_registry = None

    options = Options()
    options.headless = True
    # chrome_options = Options()
    # chrome_options.add_argument("--headless")

    driver = webdriver.Firefox(options=options)
    # driver = webdriver.Chrome(options=options, chrome_options=chrome_options)
    driver.get("https://www.10bis.co.il/")
    driver.find_element_by_css_selector(
        "button.styled__HeaderUserLink-sc-1l9k7ll-4:nth-child(3)").click()
    driver.find_element_by_css_selector("#email").send_keys(USERNAME)
    driver.find_element_by_css_selector("#password").send_keys(PASSWORD)
    driver.find_element_by_css_selector(
        ".styled__LongButton-sc-10wc8na-4").click()
    time.sleep(10)
    driver.find_element_by_css_selector(
        ".styled__ActionMenuButton-sc-1snjgai-0").click()
    time.sleep(10)
    driver.find_element_by_css_selector(
        "div.styled__ActionMenuLinkContainer-sc-1snjgai-14:nth-child(5) > a:nth-child(1)"
    ).click()
    time.sleep(10)
    credit = driver.find_element_by_css_selector(".klnjjw").text
    credit_amount = price_str(credit)
    logger.info(f"Credit amount {credit_amount}")
    credit_gauge = Gauge(
        "tenbis_credit_total",
        "Current Credit left on 10bis",
        registry=prometheus_registry,
    )
    credit_gauge.set(credit_amount)
    print(credit_amount)
    if prometheus_registry is not None:
        push_to_gateway(
            PROMETHEUS_GATEWAY_ENDPOINT,
            job=PROMETHEUS_JOB_NAME,
            registry=prometheus_registry,
        )
        logger.info("Pushed to gateway")
    driver.close()
def parse_html(source):
    mls_listings = []
    soup = BeautifulSoup(source, 'html.parser')
    results_count_el = soup.select_one(
        shared.PARAMS['selector']['results_count'])
    expected_count = extract_results_count(results_count_el.text)
    count = 0
    for listing_el in soup.select('.Listing'):
        if 'id' not in listing_el.attrs:
            continue
        count += 1
        unique_id = int(listing_el.get('id'))
        address = " ".join(listing_el.select_one('.Address').text.split())
        list_price = int(
            money_parser.price_str(listing_el.select_one('.Price').text))
        status = shared.ACTIVE
        bed_bath_str = listing_el.select_one('.BedBath').text
        bedrooms = int(re.match(r'(\d+) beds \|.*', bed_bath_str).group(1))
        bathrooms = int(
            round(
                float(
                    re.match(r'.*\| (\d+\.?\d*) baths',
                             bed_bath_str).group(1))))
        sqft = int(
            re.match(
                r'(\d+) sq\. ft\.',
                listing_el.select_one('.Listing-squareFeet').text).group(1))
        listing_agent = ''
        open_house = ''
        image_url = listing_el.select_one(
            shared.PARAMS['selector']['photo_image_ksl']).attrs['src']
        # ['mls', 'address', 'price', 'status', 'bedrooms', 'bathrooms', 'sqft', 'agent', 'open_house', 'source',
        # 'image_url']
        listing = shared.MLS(unique_id, address, list_price, status, bedrooms,
                             bathrooms, sqft, listing_agent, open_house,
                             shared.SOURCE_KSL, image_url)
        validate_listing(listing)
        mls_listings.append(listing)
    if count == 0:
        raise ValueError('No listings found')
    if count < expected_count:
        raise ValueError(
            f'Results count ({count}) does not equal expected count ({expected_count})'
        )
    return mls_listings
Exemple #14
0
 def parse_item(self, response):
     print('\x1b[1;30;43m' + 'RUNNING-ITEM' + '\x1b[0m')
     print('\x1b[1;35;40m' + response.request.url + '\x1b[0m')
     title = response.xpath(
         '//h1[@class="page-title"]/span/text()').extract_first()
     price = price_str(
         str(
             response.xpath(
                 '//span[@class="price"]/text()').extract_first()))
     if price is None:
         price = 00.00
     else:
         price = price
     description = re.sub(
         '\s+', ' ',
         unicodedata.normalize(
             "NFKD", ''.join(
                 response.xpath(
                     '//div[@class="product attribute description"]/div//text()'
                 ).extract()))).strip()
     img = 'None'
     url = response.url
     location = 'online'
     condition = 'New'
     avilability = response.xpath(
         '//div[@title="Availability"]/span/text()').extract_first()
     print('\x1b[6;30;42m' + title + '\x1b[0m')
     print('\x1b[6;30;42m' + str(price) + '\x1b[0m')
     print('\x1b[6;30;42m' + str(description) + '\x1b[0m')
     print('\x1b[6;30;42m' + str(img) + '\x1b[0m')
     print('\x1b[6;30;42m' + str(url) + '\x1b[0m')
     print('\x1b[6;30;42m' + str(avilability) + '\x1b[0m')
     yield {
         'title': title,
         'price': float(price),
         'description': description,
         'img': img,
         'url': url,
         'location': location,
         'condition': condition,
         'store': 'Lankatronics'
         # 'avilability' : avilability,
     }
Exemple #15
0
def max_min_prices():
    datasets = os.fsencode("../Datasets")
    for dataset in os.listdir(datasets):
        #   # convert filename to str from byte literal & add path ( to read )
        ds = "../Datasets/" + str(dataset, "utf-8")
        if "Donegal" in ds:
            df = pd.read_csv(ds)
            for index, row in df.iterrows():
                row['price'] = price_str(row['price'])
            df.to_csv(ds, index=False)
        df = pd.read_csv(ds)
        try:
            #  ValueError: could not convert string to float: '\x80220,000.00'
            print('County {:s}; max house price €{:.0f}m; min house price €k'.
                  format(
                      str(dataset, "utf-8").strip('.csv'),
                      float(str(df['price'].max()))))
        except Exception as error:
            print("Error")
            print(ds)
            print("OS error: {0}".format(error))
Exemple #16
0
    def parse_item(self, response):
        print('\x1b[1;30;43m' + 'RUNNING-ITEM' + '\x1b[0m')
        print('\x1b[1;35;40m' + response.request.url + '\x1b[0m')
        title = response.xpath(
            "//h1[@class='product-title entry-title']/text()").extract_first()
        price = price_str(
            response.xpath(
                "//span[@class='woocommerce-Price-amount amount']/text()").
            extract_first())
        description = re.sub(
            '\s+', ' ',
            unicodedata.normalize(
                "NFKD", ' '.join(
                    response.xpath(
                        "//div[@class='panel entry-content active']/ul/li/text()"
                    ).extract()))).strip()
        img = response.xpath(
            "//img[@class='wp-post-image']/@src").extract_first()
        url = response.url
        location = 'online'
        condition = 'New'
        #availability =

        print('\x1b[6;30;42m' + title + '\x1b[0m')
        print('\x1b[6;30;42m' + str(price) + '\x1b[0m')
        print('\x1b[6;30;42m' + str(description) + '\x1b[0m')
        print('\x1b[6;30;42m' + str(img) + '\x1b[0m')
        print('\x1b[6;30;42m' + str(url) + '\x1b[0m')

        yield {
            'title': title,
            'price': float(price),
            'description': description,
            'img': img,
            'url': url,
            'location': location,
            'condition': condition,
            'store': 'Laabai'
        }
def parse_boa(msgli):
    dateli = []
    descli = []
    amtli = []

    for msg in msgli:
        # getting dates
        headerli = msg['payload']['headers']
        for nestdic in headerli:
            if nestdic['name'] == 'Date':
                date = parser.parse(nestdic['value']).date()
                dateli.append(date)

        # getting amounts and description of transaction
        snippet = msg['snippet']
        amount_index = snippet.find("Amount")
        date_index = snippet.find("Date")
        where_index = snippet.find("Where")
        end_index = snippet.find("View details")
        if end_index == -1:
            end_index = snippet.find("This may")

        amt_string = snippet[amount_index:date_index]
        where_string = snippet[where_index + 7:end_index]

        amtli.append(price_str(amt_string))
        descli.append(where_string)

    boa_df = pd.DataFrame(
        data={
            'Date': dateli,
            'Description': descli,
            'Amount': amtli,
            'Source': ['BoA Travel'] * len(dateli)
        })

    return boa_df
Exemple #18
0
    def parse_item(self, response):
        item = AdbotItem()
        item['title'] = response.xpath('//div[@class="l-main__content"]//h1[@class="v-title"]//b/text()').extract()
        item['body'] = response.xpath(
            '//div[@class="l-main__content"]//div[@class="v-descr_text"]/text()[preceding-sibling::br and following-sibling::br]').extract()
        item['price'] = {}
        price = response.xpath('//div[@class="l-right hidden-phone"]//div[@class="v-price only"]//b/text()').extract()

        if len(price) == 1:
            price = price[0].replace(" ", "")

            value = price_str(price)
            item['price']['value'] = value

            split_price = price.split(value)
            if len(split_price) == 2:
                item['price']['currency'] = split_price[1]

        date = response.xpath('//div[@class="l-main__content"]//div[@class="v-info"]//small/text()').extract()
        date = date[1]
        date = date.split(':')
        date = date[1]
        # TODO falta convertir a long la fecha
        item['date'] = date

        item['contact'] = {}
        item['contact']['name'] = response.xpath('//div[@class="v-author__info"]//span/text()').extract()
        # TODO falta tomar el numero del usuario
        # item['contact']['phone'] = response.xpath('//div[@class="v-author__info"]//a//strong/text()').extract()
        item['url'] = response.url

        item['images'] = []
        item['images'] = self.parse_images(response)
        print("::item ", item)

        yield item
Exemple #19
0
 def parse_item(self, response):
     title = response.xpath(
         '//div[@class="item-top col-12 lg-8"]/h1/text()').extract_first()
     location = response.xpath(
         '//div[@class="item-top col-12 lg-8"]/p/span[@class="location"]/text()'
     ).extract_first()
     description = re.sub(
         '\s+', ' ',
         unicodedata.normalize(
             "NFKD", ''.join(
                 response.xpath('//div[@class="item-description"]/*/text()'
                                ).extract()))).strip()
     price = price_str(
         response.xpath(
             '//div[@class="ui-price-tag"]/span[@class="amount"]/text()').
         extract_first())
     url = response.url
     img = "na"
     condition = response.xpath(
         '//div[@class="item-properties"]/dl/dd/text()').extract_first()
     print('\x1b[6;30;42m' + str(title) + '\x1b[0m')
     print('\x1b[6;30;42m' + str(location) + '\x1b[0m')
     print('\x1b[6;30;42m' + str(description) + '\x1b[0m')
     print('\x1b[6;30;42m' + str(price) + '\x1b[0m')
     print('\x1b[6;30;42m' + str(url) + '\x1b[0m')
     print('\x1b[6;30;42m' + str(condition) + '\x1b[0m')
     yield {
         'title': title,
         'price': float(price),
         'description': description,
         'img': img,
         'url': url,
         'location': location,
         'store': "Ikman",
         'condition': condition
     }
def parse_venmo(msgli):
    dateli = []
    descli = []
    amtli = []

    for msg in msgli:
        headerli = msg['payload']['headers']
        for nestdic in headerli:
            #print(nestdic)
            # getting date of transactions
            if nestdic['name'] == 'Date':
                date = parser.parse(nestdic['value']).date()
                dateli.append(date)
            # getting amount of transactions
            if nestdic['name'] == 'Subject':
                amtli.append(price_str(nestdic['value']))

        # getting descriptions
        descli.append(msg['snippet'])

    # change sign of amount based on whether you're paying or being paid
    for desc in descli:
        if ('You charged' in desc) or ('paid You' in desc):
            amtli[descli.index(desc)] = float(amtli[descli.index(desc)]) * -1
        else:
            amtli[descli.index(desc)] = float(amtli[descli.index(desc)])

    venmo_df = pd.DataFrame(
        data={
            'Date': dateli,
            'Description': descli,
            'Amount': amtli,
            'Source': ['Venmo'] * len(dateli)
        })

    return venmo_df
Exemple #21
0
def searchAndParse(browser):
    wait = WebDriverWait(browser, 10)

    browser.get(
        "https://excalibur.mgmresorts.com/en/booking/room-booking.html#/rooms?po=0&numGuests=2&arrive=2019-05-15&depart=2019-05-21"
    )
    wait.until(
        EC.presence_of_element_located((By.CLASS_NAME, "room-list-wrapper")))

    html = browser.page_source
    soup = BeautifulSoup(html, features="html.parser")
    print("=================")
    print("Currently Available Rooms:")

    for roomContainer in soup.select("div.room-list-wrapper"):
        # Get the room's full name
        title = roomContainer.select("h3.room-title")[0].text

        # Find and delete the striked out prices (old price vs sale price)
        strikedPrices = roomContainer.select("span.room-offer-price.strike")
        for match in strikedPrices:
            match.decompose()

        # Get the remaining price (without a strike through it)
        price = price_str(
            roomContainer.select("span.room-offer-price")[0].text)

        print(title, price)

        # Check if room title matches any of the booked room titles
        for bookedRoom in settings.bookedRoomList:
            if bookedRoom['title'] == title:
                # Found a match! Let's compare prices and send email if it's cheaper, otherwise do nothing
                priceDifference = float(bookedRoom['price']) - float(price)
                if priceDifference > 0:
                    sendEmail(title, price, priceDifference)
Exemple #22
0

def send_slack_msg(text, actually_send=True):
    if actually_send:
        r = requests.post(url=post_addr,
                          data=json.dumps({
                              'text': text,
                          }).encode('utf-8'),
                          headers={'Content-Type': 'application/json'})
    else:
        print(text)

    # send_slack_msg(f'hey looks like I\'m starting up the app again, sorry')


current_backers = price_str(init_backers)
current_total = price_str(init_total)

while True:

    try:

        new_backers = price_str(get_ks_backers())
        new_total = price_str(get_ks_total())

        print(price_str(new_total))
        print('{0:.1%}'.format(float(new_total) / 70000.))
        print(datetime.datetime.now())

        if (new_backers != current_backers or new_total != current_total):
            backer_diff = int(new_backers) - int(current_backers)
		acc = re.findall(r"\bXX+[0-9]{3,}|\bX+[0-9]{3,}|\bxx+[0-9]{2,}|a/c +[0-9]{2,}|A/c ending +[0-9]{3,}|a/cx+[0-9]{2,}$",sms[i])
		if acc:
			print(acc)
			acc = re.findall('[0-9]*', acc[0]) #Extracting only numbers from account found in  acc also extracting only acc[0]
			
			#for j in range(len(acc)):
				#if(acc!=''):
			print("acc = "+str(acc))       # we found account no. 
			 
			actual_mapping_arr.append(acc)  					
			if(acc not in unique_arr):      #for new account
				unique_arr.append(acc)
				
				matchObj = re.match( r'(.*?)?(\s*)(inr|INR|Rs|rs|RS|inr.|INR.|Rs.|rs.|RS.|(amount(\s*)of)(\s*))(\s*)( *[0-9,]+.?(\s*)[0-9]*)(\s*)(.*)?',sms[i], re.M|re.I) # regex to find digits after inr|inr |rs.|rs. in the string
				if matchObj:         
					print ("matchObj.group(1) : for debit ", int(float(price_str(matchObj.group(8)))))
					debit_msg_inside=debit_msg_inside+1
					#	if sms_flag=0
					file1.write(sms[i]+"\, D\n")		#1 for debit
					excel_count=excel_count+1
					sheet1.write(excel_count,0,excel_count)
					#sheet1.write(excel_count,1,time[i])
					#sheet1.write(excel_count,2,header[i])
					sheet1.write(excel_count,3,acc)
					sheet1.write(excel_count,4,int(float(price_str(matchObj.group(8)))))
					sheet1.write(excel_count,6,sms[i])
					flag =1
				else:
					if(flag != 0):
						print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~1A")
						print(sms[i])
Exemple #24
0
def test_price_str_dec_point():
    assert '9.99' == price_str('9,99')
    assert '9|99' == price_str('9,99', dec_point='|')
Exemple #25
0
        if key_word_filter in tokens:
            print("The word " + key_word_filter + " was found")

        cprint(job_time_published_date, 'yellow')
        cprint(job_time_published_time, 'yellow')

        # telegram notification
        # conditions

        df = pd.read_csv("upwork_base.csv")

        if title_message not in job_buffer:
            job_buffer.append(title_message)

            if len(money) == 2:
                money_digit = float(price_str(money[1]))
                row_1 = {
                    'date': job_time_published_date,
                    'time': job_time_published_time,
                    'title': title_message,
                    'message': cleaned_string,
                    'fix_price': np.nan,
                    'price_min': money[0],
                    'price_max': money[1]
                }
                df = df.append(row_1, ignore_index=True)
                df.to_csv("upwork_base.csv", index=False)

                print(money_digit)
                if money_digit >= lower_hour_price_filter:
                    bot_message = (
Exemple #26
0
def test_price_str_error(wrong_raw_price):
    with pytest.raises(ValueError):
        price_str(wrong_raw_price)
Exemple #27
0
def test_price_str_default(wrong_raw_price):
    default = object()
    assert default == price_str(wrong_raw_price, default=default)
Exemple #28
0
def test_price_str_value(raw_price, price):
    assert price == price_str(raw_price)
Exemple #29
0
 def parse_money(self, amount_str):
     if type(amount_str) is str:
         amount_str = float(price_str(amount_str.replace('$', '')))
     return amount_str
def extract_price_from_string(price):
    return price_str(price)