#encoding:utf-8 import library import fastclean api = 'E:\GFEC - Global Free Education Center\CTCdata - Chinese Traditional Classical Database\Data\话' url = 'https://ctd-1257758577.cos.ap-guangzhou.myqcloud.com/data/%E8%AF%9D/' cid = 54766 + 1 count = 0 library = library.library() container = fastclean.data() container.load_data([api]) data = container.data for i in data: title = i.split('\\')[-1].split('-')[0] age = i.split('\\')[-1].split('-')[1] author = i.split('\\')[-1].split('-')[2].split('.')[0] link = url + i.split('\\')[-1].split('\\')[-1] re = [cid,title,author,age,link] print(re) library.addMethod(re,'book') cid += 1 library.save_change()
age = '宋词' db_data = c.execute('''select * from ci''') for data in db_data: title = data[1] + '-' + data[2] author = data[2] datas.append({ 'title': title, 'author': author, 'age': age, 'content': data[3] }) #print(len(datas)) count = 0 for id in range(cid_begin, cid_begin + len(datas)): re = [ id, datas[count]['title'], datas[count]['author'], datas[count]['age'], datas[count]['content'] ] library.addMethod(re, 'poem') count += 1 print('finished: ' + str(count)) print('done!') library.save_change() # added 21592 new item !
#encoding:utf-8 import library library = library.library() library.addMethod([ 54764, '金庸作品全集', '金庸', '现代', '金庸作品全集,明河社出版', 'https://ctd-1257758577.cos.ap-guangzhou.myqcloud.com/data/%E5%85%B6%E4%BB%96/%E9%87%91%E5%BA%B8%E4%BD%9C%E5%93%81%E5%85%A8%E9%9B%86.txt' ], 'download') library.save_change()
import pprint api = 'https://ctd-1257758577.cos.ap-guangzhou.myqcloud.com/data/%E5%9B%9B%E5%BA%93/' addr = 'E:\GFEC - Global Free Education Center\CTCdata - Chinese Traditional Classical Database\Data\藏\四库' library = library.library() container = fastclean.data() container.load_data([addr]) container.eliminate_fold() data = container.data[4:] pprint.pprint(data) print(len(data)) age = '清代' author = 'unknown' info = '《四库全书》全称《钦定四库全书》,是清代乾隆时期编修的大型丛书。在清高宗乾隆帝的主持下,由纪昀等360多位高官、学者编撰,3800多人抄写,耗时十三年编成。' cid_begin = 50662 + 1 count = 0 for id in range(cid_begin, cid_begin + len(data)): title = data[count].split('\\')[-1][:-4] url = api + data[count].split('\\')[-2] + '/' + data[count].split('\\')[-1] print(title) print(url) count += 1 library.addMethod([id, title, '纪昀', age, info, url], 'download') library.save_change()
file = 'E:\GFEC - Global Free Education Center\医\\000-神农本草经.txt' addr = 'E:\GFEC - Global Free Education Center\医\\' api = 'https://ctd-1257758577.cos.ap-guangzhou.myqcloud.com/data/%E5%8C%BB/' containe = fastclean.data() containe.load_data([addr]) containe.eliminate_type_not_have(['txt']) ''' data = [] for i in containe.data: title = i.split('\\')[-1].split('-')[1][:-4] if title == '格致余论': break else: data.append(i) ''' library = library.library() id = 54069 + 1 for i in containe.data: author = 'unknown' age = 'unknown' addr = api + i.split('\\')[-1] title = i.split('\\')[-1].split('-')[1][:-4] library.addMethod([id,title,author,age,addr],'book') id += 1 library.save_change()