def remove_dup_fics(self, info): print('deduping index: ' + info.Fandom_DB_Path) ficDB = FanFicSql(info.Fandom_DB_Path) ficDB.FilePath = info.Fandom_DB_Path ficDB.delete_duplicate_fics() print('Deduped index: ' + info.Fandom_DB_Path) return True
def add_fandom_links_to_list(self, info): print('starting links import database: ' + info.Fandom_DB_Path) ficDB = FanFicSql(info.Fandom_DB_Path) ficDB.FilePath = info.Fandom_DB_Path ficDB.add_fic_links_to_linkdb() print('links imported to database') return True
def save_fic_list(self, ficList): oDB = FanFicSql(self._Path) oDB.FilePath = self._Path # ffNetFile = open(self._Path, 'a') for x in range(len(ficList)): item = ficList[x] oDB.save_fic(item)
def update_link_db(self): self.load_fandom_info() print('update linkdb') for info in self.ffnet_list: ficDB = FanFicSql(info.Fandom_DB_Path) print(info.Fandom_DB_Path) ficDB.add_fic_links_to_linkdb()
def is_oldest_fics_in_db(self, info): logging.debug('find-cnt') oDB = FanFicSql(self._Path) oDB.FilePath = self._Path logging.debug('DB: ' + self._Path) oUrl = FanfictionNetUrlBuilder(info.FandomUrl, "http://", "www.fanfiction.net/") # cnt = 810 fic_cnt = 0 sUrl = oUrl.generate_page_url(1) logging.debug('surl: ' + sUrl) try: html = urlopen(sUrl) except: print('sleep') time.sleep(60) try: html = urlopen(sUrl) except: logging.CRITICAL("html = urlopen(sUrl) failed" + sUrl) print("ERROR") return fic_cnt bsObj = BeautifulSoup(html, "html5lib") icnt = self.get_fandom_length(bsObj) sUrl = oUrl.generate_page_url(icnt) html = urlopen(sUrl) bsObj = BeautifulSoup(html, "html5lib") fics = self.get_fic_from_page(bsObj) for fic in fics: if not oDB.is_fic_in_Db(fic.FFNetID): return False return True
def find_fandom_fic_cnt(self, ffnet_url): logging.debug('find-cnt') oDB = FanFicSql(self._Path) oDB.FilePath = self._Path logging.debug('DB: ' + self._Path) oUrl = FanfictionNetUrlBuilder(ffnet_url, "http://", "www.fanfiction.net/") # cnt = 810 fic_cnt = 0 sUrl = oUrl.generate_page_url(1) logging.debug('surl: ' + sUrl) try: html = urlopen(sUrl) except: print('sleep') time.sleep(60) try: html = urlopen(sUrl) except: logging.CRITICAL("html = urlopen(sUrl) failed" + sUrl) print("ERROR") return fic_cnt bsObj = BeautifulSoup(html, "html5lib") icnt = self.get_fandom_length(bsObj) sUrl = oUrl.generate_page_url(icnt) try: html = urlopen(sUrl) except: print('sleep') time.sleep(60) try: html = urlopen(sUrl) except: logging.CRITICAL("html = urlopen(sUrl) failed" + sUrl) print("ERROR") return fic_cnt bsObj = BeautifulSoup(html, "html5lib") nameList = bsObj.findAll("div", class_='z-list zhover zpointer ') last_pg_cnt = len(nameList) if icnt == 1: return last_pg_cnt else: icnt -= 1 fic_cnt = icnt * 25 fic_cnt += last_pg_cnt return fic_cnt
def testa(self): #self.create_ficdb_for_fandom_by_id(30) #self.reindex_fandom_by_id(30) info_id = 16 fan_info = self.get_fandom_info_by_id(info_id) new_fic = FanFic() new_fic.Chapters = 1 new_fic.Published = '0' new_fic.FFNetID = '0' new_fic.Rating = 'M' new_fic.Status = 'C' new_fic.Summary = 'Test' new_fic.Title = 'Test' new_fic.Url = 'http://www.test.com' new_fic.Words = '0' ficDb = FanFicSql(fan_info.Fandom_DB_Path) ficDb.FilePath = fan_info.Fandom_DB_Path #ficDb.save_fic(new_fic) new_fic.Chapters = '2' new_fic.Words = '999' new_fic.Status = "Complete" new_fic.Summary = 'update works' ficDb.update_fic(new_fic) return True
def create_link_for_fic_file(self, fic_file): db = FicFileDb('file.db') spath = self.get_FFbrowser_db_path('appdata.db') ssql = AppSql() ssql.FilePath = spath fandoms = ssql.get_fandom_list() for f in fandoms: dbPath = self.get_FFbrowser_db_path(f.Fandom_DB_Path) fanfic_db = FanFicSql(dbPath) if fanfic_db.is_fic_in_Db(fic_file.FFNetID): fanfic = fanfic_db.get_fic_by_ffnetID(fic_file.FFNetID) fic_link = FanFicDbToDb() fic_link.FicFileID = fic_file.FicID fic_link.FicId = fanfic.FicID fic_link.FanFicArchiveId = fic_file.FFNetID fic_link.DBPath = f.Fandom_DB_Path db.save_fic_link(fic_link) print("") print("Fanfic Not found in FicDBs: " + fic_file.FFNetID) print("FicID:" + str(fic_file.FicID)) print("Fic File Path: " + fic_file.FilePath) print("--------------------------------------------------")
def test(self): #self.create_ficdb_for_fandom_by_id(30) #self.reindex_fandom_by_id(30) info_id = 16 fan_info = self.get_fandom_info_by_id(info_id) new_fic = FanFic() new_fic.Chapters = 1 new_fic.Published = '0' new_fic.FFNetID = '0' new_fic.Rating = 'M' new_fic.Status = 'C' new_fic.Summary = 'Test' new_fic.Title = 'Test' new_fic.Url = 'http://www.test.com' new_fic.Words = '0' ficDb = FanFicSql(fan_info.Fandom_DB_Path) ficDb.FilePath = fan_info.Fandom_DB_Path #ficDb.save_fic(new_fic) new_fic.Chapters = '2' new_fic.Words = '999' new_fic.Status = "Complete" new_fic.Summary = 'update works' ficDb.update_fic(new_fic) return True
def update_index(self, ffnet_url, fandom_name, isXover): oDB = FanFicSql(self._Path) # ffNetFile = open(self._Path, 'a') self._is_xover = isXover self._Fandom = fandom_name oUrl = FanfictionNetUrlBuilder(ffnet_url, "http://", "www.fanfiction.net/") # cnt = 810 cnt = 3 sUrl = oUrl.generate_page_url(1) html = urlopen(sUrl) bsObj = BeautifulSoup(html, "html5lib") icnt = self.get_fandom_length(bsObj) icnt2 = 0 for x in range(icnt): i = x + 1 sUrl = oUrl.generate_page_url(i) try: html = urlopen(sUrl) except: time.sleep(60) html = urlopen(sUrl) bsObj = BeautifulSoup(html, "html5lib") _icnt = self.get_fandom_length(bsObj) if _icnt > 0: icnt2 = _icnt self.get_fic_from_page(bsObj) print(str(i)) # time.sleep(6) time.sleep(5) if icnt2 > icnt: for a in range(icnt, icnt2): ii = a + 1 sUrl = oUrl.generate_page_url(0, ii) html = urlopen(sUrl) bsObj = BeautifulSoup(html, "html5lib") self.get_fic_from_page(bsObj) print(str(ii)) time.sleep(5)
def get_db_fic_cnt(self): oDB = FanFicSql(self._Path) oDB.FilePath = self._Path db_fic_cnt = oDB.get_fanfic_cnt() return db_fic_cnt
def reindex_archive(self, ffnet_url, fandom_name, isXover, start_page_num): logging.debug('') self._is_xover = isXover self._Fandom = fandom_name oDB = FanFicSql(self._Path) oDB.FilePath = self._Path logging.debug('DB: ' + self._Path) logging.debug('lastDate: ' + str(0)) oUrl = FanfictionNetUrlBuilder(ffnet_url, "http://", "www.fanfiction.net/") # cnt = 810 cnt = 3 fic_cnt = 0 sUrl = oUrl.generate_page_url(1) logging.debug('surl: ' + sUrl) html = urlopen(sUrl) bsObj = BeautifulSoup(html, "html5lib") if not isXover: self._Fandom = self.get_fandom(bsObj) print('fandom: ' + self._Fandom) logging.debug('Fandom: ' + self._Fandom) icnt = self.get_fandom_length(bsObj) logging.debug('Length: ' + str(icnt)) icnt2 = 0 for x in range(start_page_num, icnt): sUrl = oUrl.generate_page_url(x) logging.debug('surl: ' + sUrl) try: html = urlopen(sUrl) except: print('sleep') time.sleep(60) try: html = urlopen(sUrl) except: logging.CRITICAL("html = urlopen(sUrl) failed" + sUrl) print("ERROR") return fic_cnt bsObj = BeautifulSoup(html, "html5lib") try: _icnt = self.get_fandom_length(bsObj) except: pass logging.debug('Length: ' + str(_icnt)) if _icnt > 0: icnt2 = _icnt fic_list = self.get_fic_from_page(bsObj) fic_cnt += len(fic_list) self.save_fic_list(fic_list) logging.debug('fic count: ' + str(fic_cnt)) print('page_num : ' + str(x)) # time.sleep(6) time.sleep(5) if icnt2 > icnt: for a in range(icnt, icnt2): sUrl = oUrl.generate_page_url(a) html = urlopen(sUrl) bsObj = BeautifulSoup(html, "html5lib") fic_list = self.get_fic_from_page(bsObj) fic_cnt += len(fic_list) self.save_fic_list(fic_list) print('page_num: ' + str(a)) time.sleep(5) return fic_cnt
def dedup_linkList_db(self): self.load_fandom_info() print('update linkdb') ficDB = FanFicSql(self.ffnet_list[0].Fandom_DB_Path) ficDB.delete_dup_ficlinks()
def create_ficlink_db(self): print('create fic list db') ficDB = FanFicSql('fic.db') ficDB.create_link_list_db() print('created fic list db') return True
def insert_to_db(self, path): oDB = FanFicSql(path)