Esempio n. 1
0
    def test_exception_null(self, table_text, expected):
        loader = ptr.MarkdownTableTextLoader(table_text)
        loader.table_name = "dummy"

        with pytest.raises(expected):
            for _tabletuple in loader.load():
                pass
Esempio n. 2
0
    def test_exception_MarkdownTableTextLoader(self, monkeypatch, value,
                                               source, expected):
        monkeypatch.setattr(MarkdownTableFormatter, "table_id",
                            self.valid_tag_property)

        loader = ptr.MarkdownTableTextLoader(source)
        loader.table_name = value
        formatter = MarkdownTableFormatter(source)
        formatter.accept(loader)

        with pytest.raises(expected):
            print(formatter._make_table_name())
Esempio n. 3
0
    def test_normal_MarkdownTableTextLoader_null_tag(self, monkeypatch, value,
                                                     expected):
        monkeypatch.setattr(MarkdownTableFormatter, "table_id",
                            self.null_tag_property)

        source = "<table></table>"
        loader = ptr.MarkdownTableTextLoader(source)
        loader.table_name = value
        formatter = MarkdownTableFormatter(source)
        formatter.accept(loader)

        assert formatter._make_table_name() == expected
Esempio n. 4
0
    def test_normal(self, test_id, table_text, table_name,
                    expected_tabletuple_list):
        loader = ptr.MarkdownTableTextLoader(table_text)
        loader.table_name = table_name

        load = False
        for table_data in loader.load():
            print("--- id: {} ---".format(test_id))
            print("[actual]\n{}".format(table_data))
            print("[expected]")
            for expected in expected_tabletuple_list:
                print("    {}".format(expected))
            print("")
            assert table_data.in_tabledata_list(expected_tabletuple_list)

            load = True

        assert load
Esempio n. 5
0
                            '')  # remove all url for better price matching
                        if len(post.timestamps
                               ) == 0:  # If we dont have any imgur or ibb link
                            post.urls.append(
                                url)  # append all links in the most

                # FIND PRICES.
                price_re = re.compile(
                    r'(bought for |sold for |asking( for)? |selling for |shipped |for |\$(\s)?)?(?<!\dx)'  #search for keywords, but not nxn (RAM)
                    r'\d{1,4}(\.\d{0,2})?\$?'  #search for numbers and decimal places, and dollar sign after the number.
                    r'(?!\+ bronze|\+ gold|\+ silver|\+ certified|\+ platinum)'  #don't match 80+ ratings.
                    r'( \$| shipped| local| plus|(\s)?\+|(\s)?obo| or| sold| for|(\s)?USD)*',  #match these keywords
                    re.IGNORECASE)

                if '|' in post.body:  # | means we found a table
                    loader = ptr.MarkdownTableTextLoader(text=post.body)
                    dfs = []
                    for table_data in loader.load():
                        df = table_data.as_dataframe()
                        dfs.append(df)

                        for column in range(
                                len(df.columns
                                    )):  # Find what column prices are in.
                            prices = price_re.finditer(str(df.iloc[0, column]))
                            try:
                                for price in prices:
                                    price_string = price.group(0)
                                    identified_price = identifyprice(
                                        price_string)
                                    if identified_price != None:
Esempio n. 6
0
                    instance.title = listing_title
                    instance.url = submission.url.strip()
                    instance.body = submission.selftext
                    instance.price = ''
                    list_of_posts.append(instance)
                    print(list_of_posts[-1].title)

                    #print(instance.title)
                    #print(instance.url)
                    #print(instance.body) #THIS IS THE TEXT OF THE POST!
                    #print(instance.price)
                    if '|' in instance.body:
                        print('Table:')
                        #print(listing_text)
                        loader = ptr.MarkdownTableTextLoader(
                            text=submission.selftext)
                        writer = ptw.TableWriterFactory.create_from_format_name(
                            "rst")

                        for table_data in loader.load():
                            writer.from_tabledata(table_data)
                            writer.write_table()
                        # print(table_data.as_dataframe())
                    else:
                        final_item_count = 0
                        item_count = 0  # How many item do we think there is in the listing
                        price_count = 0  # How many prices we think there is in the listing
                        splitter_found = False
                        if ',' in instance.title:
                            splitter_found = True
                            items = instance.title.split(',')