def test_exception_null(self, table_text, expected): loader = ptr.MarkdownTableTextLoader(table_text) loader.table_name = "dummy" with pytest.raises(expected): for _tabletuple in loader.load(): pass
def test_exception_MarkdownTableTextLoader(self, monkeypatch, value, source, expected): monkeypatch.setattr(MarkdownTableFormatter, "table_id", self.valid_tag_property) loader = ptr.MarkdownTableTextLoader(source) loader.table_name = value formatter = MarkdownTableFormatter(source) formatter.accept(loader) with pytest.raises(expected): print(formatter._make_table_name())
def test_normal_MarkdownTableTextLoader_null_tag(self, monkeypatch, value, expected): monkeypatch.setattr(MarkdownTableFormatter, "table_id", self.null_tag_property) source = "<table></table>" loader = ptr.MarkdownTableTextLoader(source) loader.table_name = value formatter = MarkdownTableFormatter(source) formatter.accept(loader) assert formatter._make_table_name() == expected
def test_normal(self, test_id, table_text, table_name, expected_tabletuple_list): loader = ptr.MarkdownTableTextLoader(table_text) loader.table_name = table_name load = False for table_data in loader.load(): print("--- id: {} ---".format(test_id)) print("[actual]\n{}".format(table_data)) print("[expected]") for expected in expected_tabletuple_list: print(" {}".format(expected)) print("") assert table_data.in_tabledata_list(expected_tabletuple_list) load = True assert load
'') # remove all url for better price matching if len(post.timestamps ) == 0: # If we dont have any imgur or ibb link post.urls.append( url) # append all links in the most # FIND PRICES. price_re = re.compile( r'(bought for |sold for |asking( for)? |selling for |shipped |for |\$(\s)?)?(?<!\dx)' #search for keywords, but not nxn (RAM) r'\d{1,4}(\.\d{0,2})?\$?' #search for numbers and decimal places, and dollar sign after the number. r'(?!\+ bronze|\+ gold|\+ silver|\+ certified|\+ platinum)' #don't match 80+ ratings. r'( \$| shipped| local| plus|(\s)?\+|(\s)?obo| or| sold| for|(\s)?USD)*', #match these keywords re.IGNORECASE) if '|' in post.body: # | means we found a table loader = ptr.MarkdownTableTextLoader(text=post.body) dfs = [] for table_data in loader.load(): df = table_data.as_dataframe() dfs.append(df) for column in range( len(df.columns )): # Find what column prices are in. prices = price_re.finditer(str(df.iloc[0, column])) try: for price in prices: price_string = price.group(0) identified_price = identifyprice( price_string) if identified_price != None:
instance.title = listing_title instance.url = submission.url.strip() instance.body = submission.selftext instance.price = '' list_of_posts.append(instance) print(list_of_posts[-1].title) #print(instance.title) #print(instance.url) #print(instance.body) #THIS IS THE TEXT OF THE POST! #print(instance.price) if '|' in instance.body: print('Table:') #print(listing_text) loader = ptr.MarkdownTableTextLoader( text=submission.selftext) writer = ptw.TableWriterFactory.create_from_format_name( "rst") for table_data in loader.load(): writer.from_tabledata(table_data) writer.write_table() # print(table_data.as_dataframe()) else: final_item_count = 0 item_count = 0 # How many item do we think there is in the listing price_count = 0 # How many prices we think there is in the listing splitter_found = False if ',' in instance.title: splitter_found = True items = instance.title.split(',')