def from_json_to_db(self): thread = '' db = Base("scnscraper/abap.pydb", save_to_file= True) # create new base with field names db.create('url', 'uid', 'type', 'author', 'title', 'date_time', 'tags', 'views', 'answers', 'resolve', 'upvotes', 'text', mode='override') i=0 with open('scnsraper/threads.json', 'r') as file: for line in file: if(line.endswith(" }\n")): thread += line tokens = re.search(r"url:\s'(.*?)',\suid:\s'(.*?)',\stype:\s'(.*?)',\sauthor:\s'(.*?)',\stitle:\s'(.*?)',\sdate_time:\s'(.*?)',\stags:\s'(.*?)',\sviews:\s'(.*?)',\sanswers:\s'(.*?)',\sresolve:\s'(.*?)',\supvotes:\s'(.*?)', text:\s'((.|\n)*)'\s}", str(thread)) if tokens is not None: db.insert(url = tokens.group(1), uid = tokens.group(2), type= tokens.group(3), author=tokens.group(4), title = tokens.group(5), date_time = tokens.group(6), tags = tokens.group(7), views = tokens.group(8), answers = tokens.group(9), resolve = tokens.group(10), upvotes = tokens.group(11), text = tokens.group(12)) db.commit() print ('\n--------------------------------------------\n') thread = '' if(line.startswith(" ]")): print("new page") thread = '' if(line.endswith('\n') and (not line.startswith(" ]\n\n")) and (not line.endswith(" }\n"))): thread += line
def pydblite(): from pydblite.pydblite import Base db = Base('dummy', save_to_file=False) # create new base with field names db.create('name', 'age', 'size') # insert new record db.insert(name='homer', age=23, size=1.84) # records are dictionaries with a unique integer key __id__ # simple selection by field value records = db(name="homer") # complex selection by list comprehension res = [r for r in db if 30 > r['age'] >= 18 and r['size'] < 2] print("res:", res) # delete a record or a list of records r = records[0] db.delete(r) list_of_records = [] r = db.insert(name='homer', age=23, size=1.84) list_of_records.append(db[r]) r = db.insert(name='marge', age=36, size=1.94) list_of_records.append(db[r]) # or generator expression for r in (r for r in db if r['name'] in ('homer', 'marge')): # print "record:", r pass db.delete(list_of_records) rec_id = db.insert(name='Bart', age=15, size=1.34) record = db[rec_id] # the record such that record['__id__'] == rec_id # delete a record by its id del db[rec_id] # create an index on a field db.create_index('age') # update rec_id = db.insert(name='Lisa', age=13, size=1.24) # direct access by id record = db[rec_id] db.update(record, age=24) # add and drop fields db.add_field('new_field', default=0) db.drop_field('name') # save changes on disk db.commit()
def pydblite(): from pydblite.pydblite import Base db = Base('dummy', save_to_file=False) # create new base with field names db.create('name', 'age', 'size') # insert new record db.insert(name='homer', age=23, size=1.84) # records are dictionaries with a unique integer key __id__ # simple selection by field value records = db(name="homer") # complex selection by list comprehension res = [r for r in db if 30 > r['age'] >= 18 and r['size'] < 2] print "res:", res # delete a record or a list of records r = records[0] db.delete(r) list_of_records = [] r = db.insert(name='homer', age=23, size=1.84) list_of_records.append(db[r]) r = db.insert(name='marge', age=36, size=1.94) list_of_records.append(db[r]) # or generator expression for r in (r for r in db if r['name'] in ('homer', 'marge')): # print "record:", r pass db.delete(list_of_records) rec_id = db.insert(name='Bart', age=15, size=1.34) record = db[rec_id] # the record such that record['__id__'] == rec_id # delete a record by its id del db[rec_id] # create an index on a field db.create_index('age') # update rec_id = db.insert(name='Lisa', age=13, size=1.24) # direct access by id record = db[rec_id] db.update(record, age=24) # add and drop fields db.add_field('new_field', default=0) db.drop_field('name') # save changes on disk db.commit()
def from_json_to_db(self): thread = '' db = Base("scnscraper/abap.pydb", save_to_file=True) # create new base with field names db.create('url', 'uid', 'type', 'author', 'title', 'date_time', 'tags', 'views', 'answers', 'resolve', 'upvotes', 'text', mode='override') i = 0 with open('scnsraper/threads.json', 'r') as file: for line in file: if (line.endswith(" }\n")): thread += line tokens = re.search( r"url:\s'(.*?)',\suid:\s'(.*?)',\stype:\s'(.*?)',\sauthor:\s'(.*?)',\stitle:\s'(.*?)',\sdate_time:\s'(.*?)',\stags:\s'(.*?)',\sviews:\s'(.*?)',\sanswers:\s'(.*?)',\sresolve:\s'(.*?)',\supvotes:\s'(.*?)', text:\s'((.|\n)*)'\s}", str(thread)) if tokens is not None: db.insert(url=tokens.group(1), uid=tokens.group(2), type=tokens.group(3), author=tokens.group(4), title=tokens.group(5), date_time=tokens.group(6), tags=tokens.group(7), views=tokens.group(8), answers=tokens.group(9), resolve=tokens.group(10), upvotes=tokens.group(11), text=tokens.group(12)) db.commit() print('\n--------------------------------------------\n') thread = '' if (line.startswith(" ]")): print("new page") thread = '' if (line.endswith('\n') and (not line.startswith(" ]\n\n")) and (not line.endswith(" }\n"))): thread += line
def test_open_existing(self): db = Base(test_db_name, save_to_file=True) db.create('unique_id', 'name', "active", mode="open") db.insert("123", "N", True) db.commit() # Just verify that it works to open an existing db. # The column names are ignored, therefore they should # equal the old column names db = Base(test_db_name, save_to_file=True) db.create('unique_id2', 'name2', "active2", mode="open") rec = db.insert("123", "N", True) db.commit() self.assertEqual(db.fields, ['unique_id', 'name', "active"]) # mode="override" will overwrite existing db db = Base(test_db_name, save_to_file=True) db.create('unique_id', 'name', "active", mode="override") db.commit() self.assertEqual(len(self.filter_db), 0) # Equals passing mode=None self.assertRaises(IOError, db.create, 'unique_id', 'name', "active") self.assertRaises(ValueError, db.create, 'unique_id', 'name', "active", mode="invalidmode")
class InMemoryDBLite(InMemoryDB): """Class that implements all steps from Dextra's programming challange. Uses pydblite in-memory engine. """ def __init__(self, name: str): logger.debug('Initializing DB.') self.connected = False self.name = name self.db = Base(name, save_to_file=False) def connect(self): logger.debug(f'Connecting to [{self.name}].') # When using pydblite in-memory engine, is unnecessary # connect to a db, so we just set the flag to true self.connected = True def disconnect(self): logger.debug(f'Disconnecting from [{self.name}].') if not self.connected: raise Exception('Not connected to db.') else: # When using pydblite in-memory engine, is unnecessary # disconnect from a db, so we just set the flag to false self.connected = False def create_schema(self, *args): logger.debug(f'Crating schema into [{self.name}].') if not self.connected: raise Exception('Not connected to db.') else: r = self.db.create(*args, mode='override') self.db.commit() return r def insert(self, item: dict): logger.debug(f'Inserting item into [{self.name}].') if not self.connected: raise Exception('Not connected to db.') else: r = self.db.insert(**item) self.db.commit() return r def insert_multiple(self, items: list): logger.debug(f'Inserting multiple items into [{self.name}].') if not self.connected: raise Exception('Not connected to db.') else: for item in items: r = self.db.insert(**item) self.db.commit() return r
class PyDbLiteTestCase(Generic, unittest.TestCase): def setUp(self): # NOQA self.first_record_id = 0 filter_db = Base(test_db_name, save_to_file=False) filter_db.create('unique_id', 'name', "active", mode="override") self.filter_db = filter_db def tearDown(self): # NOQA if os.path.isfile(test_db_name): os.remove(test_db_name) elif os.path.isdir(test_db_name): os.rmdir(test_db_name) def setup_db_for_filter(self): self.reset_status_values_for_filter() for d in self.status: res = self.filter_db.insert(**d) self.assertEqual(res, 6) def test_open(self): db = Base('dummy', save_to_file=False) db.create('name', 'age', 'size') db.insert(name='homer', age=23, size=1.84) def test_open_file_with_existing_dir(self): os.mkdir(test_db_name) db = Base(test_db_name, save_to_file=True) # A dir with that name exists self.assertRaises(IOError, db.create, 'unique_id', 'name', "active", mode="open") def test_open_existing(self): db = Base(test_db_name, save_to_file=True) db.create('unique_id', 'name', "active", mode="open") db.insert("123", "N", True) db.commit() # Just verify that it works to open an existing db. # The column names are ignored, therefore they should # equal the old column names db = Base(test_db_name, save_to_file=True) db.create('unique_id2', 'name2', "active2", mode="open") rec = db.insert("123", "N", True) db.commit() self.assertEqual(db.fields, ['unique_id', 'name', "active"]) # mode="override" will overwrite existing db db = Base(test_db_name, save_to_file=True) db.create('unique_id', 'name', "active", mode="override") db.commit() self.assertEqual(len(self.filter_db), 0) # Equals passing mode=None self.assertRaises(IOError, db.create, 'unique_id', 'name', "active") self.assertRaises(ValueError, db.create, 'unique_id', 'name', "active", mode="invalidmode") def test_open_memory(self): db = Base(":memory:") self.assertFalse(db.save_to_file) def test_open_memory_with_existing_filename(self): self.filter_db = Base(test_db_name, save_to_file=True) self.filter_db.create('unique_id', 'name', "active", mode="override") self.filter_db.commit() db = Base(test_db_name, save_to_file=False) db.open() self.assertEqual(db.fields, ['unique_id', 'name', "active"]) db = Base(test_db_name, save_to_file=False) db.create('unique_id2', 'name2', "active2", mode="override") self.assertEqual(db.fields, ['unique_id2', 'name2', "active2"]) def test_insert_list(self): status = (8, "testname", 0) # Insert 7 entries rec = self.filter_db.insert(status) self.assertEqual(rec, 0) self.assertEqual(self.filter_db[rec]["unique_id"], status) def test_sqlite_compat_insert_list(self): self.filter_db = Base(test_db_name, save_to_file=False, sqlite_compat=True) self.filter_db.create('unique_id', 'name', "active", mode="override") status = [(8, "testname", 0)] # Insert 1 entries rec = self.filter_db.insert(status) self.assertEqual(rec, None) self.assertEqual(len(self.filter_db), 1) self.assertEqual(self.filter_db[0]["unique_id"], 8) self.assertEqual(self.filter_db[0]["name"], "testname") self.assertEqual(self.filter_db[0]["active"], 0) def test_sqlite_compat(self): db = Base(test_db_name, save_to_file=False, sqlite_compat=True) db.create('unique_id', 'name', "active", mode="open") self.reset_status_values_for_filter() # Insert 7 entries res = db.insert(self.status) self.assertEqual(res, None) self.assertEqual(len(db), 7) status = [(8, "testname", 0)] res = db.insert(status) self.assertEqual(res, None) self.assertEqual(len(db), 8)
mon_dd_yyyy=mon_dd_yyyy, month_dd_yyyy=month_dd_yyyy, dd_mm_yyyy=dd_mm_yyyy, mm_dd_yyyy=mm_dd_yyyy, mm_dd_yy=mm_dd_yy, dd_mm_yy=dd_mm_yy, m_d_yy=m_d_yy, d_m_yy=d_m_yy, weekday_flag=weekday_flag, week_first_day_flag=week_first_day_flag, week_last_day_flag=week_last_day_flag, month_first_day_flag=month_first_day_flag, month_last_day_flag=month_last_day_flag, quarter_first_day_flag=quarter_first_day_flag, quarter_last_day_flag=quarter_last_day_flag, year_first_day_flag=year_first_day_flag, year_last_day_flag=year_last_day_flag, leap_year_flag=leap_year_flag, is_holiday=is_holiday, holiday_name=holiday_name, nth_weekday=nth_weekday) # save data. date_table.commit() # write data as CSV with open('temporal_data.csv', 'w', newline='') as myfile: wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) # write header wr.writerow(list(date_table[0].keys())) # write value for item in date_table: wr.writerow(list(item.values()))
class DataStoring(): #Inizialize an instantiated object by opening json file and the database def __init__(self): self.out_file = open("scnscraper/abap.json", "a") self.out_file.close() self.db = Base("scnscraper/abap.pydb") if self.db.exists(): self.db.open() else: self.db.create('url', 'uid', 'type', 'author', 'title', 'date_time', 'tags', 'views', 'answers', 'resolve', 'upvotes', 'text') #for each thread scraped, insert it into db def insert_items_into_db(self, threads): for thread in threads: item = SapItem() # New Item instance item = thread try: # Insert into db self.db.insert(url = str(item["url"]), uid = str(item["uid"]), type= str(item["type"] ), author=str(item["author"]), title = str(item["title"]), date_time = str(item["date_time"] ),tags = str(item["tags"] ), views = str(item["views"] ), answers = str(item["answers"] ), resolve = str(item["resolve"] ), upvotes = str(item["upvotes"] ), text = str(item["text"])) except UnicodeEncodeError: print("Unicode Encode Exception!") #save changes on disk self.db.commit() # for each thread scraped, initialize the string to insert into json file def threads_to_str(self, threads): out_string = "[ " if threads.__len__() == 0: return "" for thread in threads: item = SapItem() item = thread try: out_string += "{ url: '" + str(item["url"] ) + "', " + "uid: '" + str(item["uid"] ) + "', "\ "type: '" + str(item["type"] ) + "', "\ "author: '"+ str(item["author"]) + "', " \ "title: '"+ str(item["title"]) + "', "\ "date_time: '"+ str(item["date_time"] ) + "', " \ "tags: '"+ str(item["tags"] ) + "', " \ "views: '"+ str(item["views"] ) + "', "\ "answers: '"+ str(item["answers"] ) + "', " \ "resolve: '"+ str(item["resolve"] ) + "', " \ "upvotes: '"+ str(item["upvotes"] ) + "', "\ "text: '" + str(item["text"]) + "' }\n" except UnicodeEncodeError: print("Unicode Encode Exception!") out_string += " ]\n\n" return out_string #for each thread scraped, insert it into json file def insert_items_into_file(self, threads): try: self.out_file = open("scnscraper/abap.json", "a") # open in append mode #convert into string and insert into file self.out_file.write(self.threads_to_str(threads)) self.out_file.close() except: print('Exception in writing file') self.out_file.close() # read the web page index def read_index_from_file(self): if os.path.exists('scnscraper/index.txt'): with open('scnscraper/index.txt') as f: index = int(f.readline()) f.close() else: f = open('scnscraper/index.txt', 'w') index = 2 f.write(str(index)) f.close() return index # Write the web page index def write_index_into_file(self, i): f = open('scnscraper/index.txt', 'w') f.write(str(i)) f.close() # Convert the content of json file into a new db def from_json_to_db(self): thread = '' db = Base("scnscraper/abap.pydb", save_to_file= True) # create new base with field names db.create('url', 'uid', 'type', 'author', 'title', 'date_time', 'tags', 'views', 'answers', 'resolve', 'upvotes', 'text', mode='override') i=0 with open('scnsraper/threads.json', 'r') as file: for line in file: if(line.endswith(" }\n")): thread += line tokens = re.search(r"url:\s'(.*?)',\suid:\s'(.*?)',\stype:\s'(.*?)',\sauthor:\s'(.*?)',\stitle:\s'(.*?)',\sdate_time:\s'(.*?)',\stags:\s'(.*?)',\sviews:\s'(.*?)',\sanswers:\s'(.*?)',\sresolve:\s'(.*?)',\supvotes:\s'(.*?)', text:\s'((.|\n)*)'\s}", str(thread)) if tokens is not None: db.insert(url = tokens.group(1), uid = tokens.group(2), type= tokens.group(3), author=tokens.group(4), title = tokens.group(5), date_time = tokens.group(6), tags = tokens.group(7), views = tokens.group(8), answers = tokens.group(9), resolve = tokens.group(10), upvotes = tokens.group(11), text = tokens.group(12)) db.commit() print ('\n--------------------------------------------\n') thread = '' if(line.startswith(" ]")): print("new page") thread = '' if(line.endswith('\n') and (not line.startswith(" ]\n\n")) and (not line.endswith(" }\n"))): thread += line def state_extraction(): db = Base("scnscraper/abap.pydb") if db.exists(): db.open() record = db(type = "Question") print("# discussion scraped: " + str(record.__len__())) print("Answered: " + str(db(resolve = "Answered.").__len__())) print("Answered with solution: "+ str(db(resolve = "solution").__len__())) print("Not Answered: " + str(db(resolve = "Not Answered.").__len__())) print("Assumed Answered: " + str(db(resolve = "Assumed Answered.").__len__())) state_extraction = staticmethod(state_extraction)
class DataStoring(): #Inizialize an instantiated object by opening json file and the database def __init__(self): self.out_file = open("scnscraper/abap.json", "a") self.out_file.close() self.db = Base("scnscraper/abap.pydb") if self.db.exists(): self.db.open() else: self.db.create('url', 'uid', 'type', 'author', 'title', 'date_time', 'tags', 'views', 'answers', 'resolve', 'upvotes', 'text') #for each thread scraped, insert it into db def insert_items_into_db(self, threads): for thread in threads: item = SapItem() # New Item instance item = thread try: # Insert into db self.db.insert(url=str(item["url"]), uid=str(item["uid"]), type=str(item["type"]), author=str(item["author"]), title=str(item["title"]), date_time=str(item["date_time"]), tags=str(item["tags"]), views=str(item["views"]), answers=str(item["answers"]), resolve=str(item["resolve"]), upvotes=str(item["upvotes"]), text=str(item["text"])) except UnicodeEncodeError: print("Unicode Encode Exception!") #save changes on disk self.db.commit() # for each thread scraped, initialize the string to insert into json file def threads_to_str(self, threads): out_string = "[ " if threads.__len__() == 0: return "" for thread in threads: item = SapItem() item = thread try: out_string += "{ url: '" + str(item["url"] ) + "', " + "uid: '" + str(item["uid"] ) + "', "\ "type: '" + str(item["type"] ) + "', "\ "author: '"+ str(item["author"]) + "', " \ "title: '"+ str(item["title"]) + "', "\ "date_time: '"+ str(item["date_time"] ) + "', " \ "tags: '"+ str(item["tags"] ) + "', " \ "views: '"+ str(item["views"] ) + "', "\ "answers: '"+ str(item["answers"] ) + "', " \ "resolve: '"+ str(item["resolve"] ) + "', " \ "upvotes: '"+ str(item["upvotes"] ) + "', "\ "text: '" + str(item["text"]) + "' }\n" except UnicodeEncodeError: print("Unicode Encode Exception!") out_string += " ]\n\n" return out_string #for each thread scraped, insert it into json file def insert_items_into_file(self, threads): try: self.out_file = open("scnscraper/abap.json", "a") # open in append mode #convert into string and insert into file self.out_file.write(self.threads_to_str(threads)) self.out_file.close() except: print('Exception in writing file') self.out_file.close() # read the web page index def read_index_from_file(self): if os.path.exists('scnscraper/index.txt'): with open('scnscraper/index.txt') as f: index = int(f.readline()) f.close() else: f = open('scnscraper/index.txt', 'w') index = 2 f.write(str(index)) f.close() return index # Write the web page index def write_index_into_file(self, i): f = open('scnscraper/index.txt', 'w') f.write(str(i)) f.close() # Convert the content of json file into a new db def from_json_to_db(self): thread = '' db = Base("scnscraper/abap.pydb", save_to_file=True) # create new base with field names db.create('url', 'uid', 'type', 'author', 'title', 'date_time', 'tags', 'views', 'answers', 'resolve', 'upvotes', 'text', mode='override') i = 0 with open('scnsraper/threads.json', 'r') as file: for line in file: if (line.endswith(" }\n")): thread += line tokens = re.search( r"url:\s'(.*?)',\suid:\s'(.*?)',\stype:\s'(.*?)',\sauthor:\s'(.*?)',\stitle:\s'(.*?)',\sdate_time:\s'(.*?)',\stags:\s'(.*?)',\sviews:\s'(.*?)',\sanswers:\s'(.*?)',\sresolve:\s'(.*?)',\supvotes:\s'(.*?)', text:\s'((.|\n)*)'\s}", str(thread)) if tokens is not None: db.insert(url=tokens.group(1), uid=tokens.group(2), type=tokens.group(3), author=tokens.group(4), title=tokens.group(5), date_time=tokens.group(6), tags=tokens.group(7), views=tokens.group(8), answers=tokens.group(9), resolve=tokens.group(10), upvotes=tokens.group(11), text=tokens.group(12)) db.commit() print('\n--------------------------------------------\n') thread = '' if (line.startswith(" ]")): print("new page") thread = '' if (line.endswith('\n') and (not line.startswith(" ]\n\n")) and (not line.endswith(" }\n"))): thread += line def state_extraction(): db = Base("scnscraper/abap.pydb") if db.exists(): db.open() record = db(type="Question") print("# discussion scraped: " + str(record.__len__())) print("Answered: " + str(db(resolve="Answered.").__len__())) print("Answered with solution: " + str(db(resolve="solution").__len__())) print("Not Answered: " + str(db(resolve="Not Answered.").__len__())) print("Assumed Answered: " + str(db(resolve="Assumed Answered.").__len__())) state_extraction = staticmethod(state_extraction)