def setup(): commit('''CREATE TABLE unsubs(id INT NOT NULL AUTO_INCREMENT, \ hash VARCHAR(8), \ url VARCHAR(1000), \ email VARCHAR(150), \ PRIMARY KEY (id))''') commit('''CREATE TABLE readmail(id INT NOT NULL AUTO_INCREMENT, \ email INT, \ PRIMARY KEY (id))''') commit('''CREATE TABLE usercount(id INT NOT NULL AUTO_INCREMENT, \ another INT, \ PRIMARY KEY (id))''') commit('''CREATE TABLE analytics(id INT NOT NULL AUTO_INCREMENT, \ email VARCHAR(150), \ url VARCHAR(1000), \ success INT, \ PRIMARY KEY (id))''') commit('''CREATE TABLE anonymousanalytics(id INT NOT NULL AUTO_INCREMENT, \ emailhash VARCHAR(64), \ unsubhash VARCHAR(8), \ success INT, \ stamp DATETIME, \ PRIMARY KEY (id))''')
def deleteAllUnsubs(): results = fetch('select hash from unsubs') log.info('deleting all unsubs with # unsubs ' + str(len(results))) if len(results) < 15: for r in results: hh = r[0] commit('delete from unsubs where hash=%s', hh)
def handleDB(it): for jj in range(10): ll, origSet = getFive() if not ll: if it > 2: log.info('empty turning off') #time.sleep(120) # wait for master to finish turnOff() return browser = selenium.getBrowser() log.info(str(len(ll)) + str(ll) + str(jj)) try: i = 0 for uns in ll: if i > 6: break i += 1 log.info('hashh', uns.hashh) res = unsubscribe(uns, browser) if not res: log.info('failed confirmation', uns.hashh) addEmailToSqlAnalytics(uns, False) else: log.info('confirmed unsub') commit('insert into usercount (another) values (1)') addEmailToSqlAnalytics(uns, True) #browser = selenium.refreshBrowser(browser) except Exception as e: log.warn(e) log.info('deleting from unsubs ' + str(origSet)) for ss in origSet: commit('delete from unsubs where hash=%s', ss) selenium.closeBrowser(browser)
def post(self,ids): title = self.get_argument('title') fl = self.get_argument('fl') tag = self.get_argument('tag') content = self.get_argument('comment') sql.cur.execute("update blog set title='%s',fl='%s',tag='%s',content='%s' where id = '%s'"%( title,fl,tag,content,ids)) sql.commit() self.redirect('/test/%s'%ids)
def post(self, ids): title = self.get_argument('title') fl = self.get_argument('fl') tag = self.get_argument('tag') content = self.get_argument('comment') sql.cur.execute( "update blog set title='%s',fl='%s',tag='%s',content='%s' where id = '%s'" % (title, fl, tag, content, ids)) sql.commit() self.redirect('/test/%s' % ids)
def deleteReadEmail17days(): results = fetch('select email from readmail') print results[-10:] start = 6000 for r in results: commit('delete from readmail where email=%s',str(start)) start += 1 results = fetch('select email from readmail') print results[-10:]
def fill_proxypool(): page = get_webpage() # 爬取西刺代理网站,注意频度保持在15分钟以上 # page = test.read('西刺代理.html', 'utf-8') # 测试所用,读取本地网页,避免频繁爬取被封杀 counter = 1 for item in parse_page(page): print('正在更新:{0:^2} - {1:<16}'.format(counter, item[0]), end='\t') if sql.insert(item[0], item[1], item[2], item[3], item[4]): counter += 1 else: print('重复插入') sql.commit() print('\n此次共更新 %d 条代理IP\n' % (counter - 1))
def deleteLastReads(): results = fetch('select email from readmail') print results[-10:] total = len(results) for r in results[total-150:]: commit('delete from readmail where email=%s',r[0]) results = fetch('select email from readmail') print results[-10:] #deleteLastReads() #deleteReadEmail17days()
def post(self): res_list=[] ids = int(time.strftime("%Y%m%y%H%M%S")+ str(random.randrange(10000,99999))) name = 'admin' email = '*****@*****.**' title = self.get_argument('title') fl = self.get_argument('fl') tag = self.get_argument('tag') tms = time.strftime('%Y-%m-%d %H:%M') content = self.get_argument('comment') for i in int(ids),name,email,title,fl,tag,tms,content:res_list.append(i) sql.cur.execute("insert into blog values (?,?,?,?,?,?,?,?)",res_list) sql.commit() self.redirect('/test')
def main(argv): parser = argparse.ArgumentParser(description='') parser.add_argument('files', nargs='+') parser.add_argument('--standortid', type=int) parser.add_argument('--pages', type=int, default=1) args = parser.parse_args(argv) for full_fn in args.files: fn = ntpath.basename(full_fn) # look up the file id file_id = sql.fetch('ufiles', fn, id_key='name') if not file_id: print "File '%s' not found in DB, skipping" % fn else: data = [] headers = [] for page in xrange(args.pages): data, headers = p_xls.read_xls_data(full_fn, page) lines = 0 # administration: number of succesfully inserted lines for row in data: standortid = -1 if hasattr(row, 'StandortID'): standortid = getattr(row, 'StandortID') elif args.standortid != None: standortid = args.standortid else: sys.stderr.write('No StandortID found!') exit() rs = sql.fetch_all('temps', { 'datum': getattr(row, 'Datum'), 'location_id': standortid }) if rs != False:# and len(rs) == 1: for i in xrange(len(rs)): if (sql.insert('ufiletemps', { 'ufile_id': file_id['id'], 'temp_id': rs[i]['id'] })): lines += 1 else: print "%d,%d" % (file_id['id'], rs[i]['id']) print "Inserted %d/%d of page %d" % (lines, len(data), page) sql.commit() # after each file return None
def post(self): res_list = [] ids = int( time.strftime("%Y%m%y%H%M%S") + str(random.randrange(10000, 99999))) name = 'admin' email = '*****@*****.**' title = self.get_argument('title') fl = self.get_argument('fl') tag = self.get_argument('tag') tms = time.strftime('%Y-%m-%d %H:%M') content = self.get_argument('comment') for i in int(ids), name, email, title, fl, tag, tms, content: res_list.append(i) sql.cur.execute("insert into blog values (?,?,?,?,?,?,?,?)", res_list) sql.commit() self.redirect('/test')
def ApiList(jmgr, os_target, sql, args): sql.connect_table(tables['api_type']) sql.connect_table(tables['api_list']) api_types = os_target.get_api_types() apis = os_target.get_apis() sql.delete_record(tables['api_type']) for type_id, type_name in api_types.items(): values = dict() values['type'] = type_id values['name'] = type_name sql.append_record(tables['api_type'], values) sql.delete_record(tables['api_list']) for values in apis: sql.append_record(tables['api_list'], values) sql.commit()
def anonymousAnalytics(email, unsubhash, success=False): digest = hashEmail(email) now = str(datetime.datetime.now()) results = fetch( 'select unsubhash, success from anonymousanalytics where unsubhash=%s', (unsubhash)) success = int(success) if results: if int(results[0][1]) == 0 and success: commit( 'update anonymousanalytics set success=1 where unsubhash=%s', (unsubhash)) else: log.info('unsub hash is still failing, do not update analytics', unsubhash) else: commit( 'insert into anonymousanalytics (emailhash, unsubhash, success, stamp) values (%s, %s, %s, %s)', (digest, unsubhash, str(success), now))
def append_api_list(sql, type, id, name): sql.connect_table(tables['api_list']) retry = True while retry: res = sql.search_record(tables['api_list'], 'type=' + Table.stringify(type) + ' and id=' + Table.stringify(id), ['name']) if len(res) > 0 and res[0][0]: if res[0][0] != name: raise Error('duplicate key value (' + Table.stringify(type) + ',' + Table.stringify(id) + ')') return values = dict() values['type'] = type values['id'] = id values['name'] = name retry = False try: sql.append_record(tables['api_list'], values) sql.commit() except: retry = True pass
import sql import pickle import pprint content = open('result_array.pkl','rb') data = pickle.load(content) #sql.create_Table() def analyze_link(i): str = 'https://ctd-1257758577.cos.ap-guangzhou.myqcloud.com/' + i['addr'] str = str.replace('\\','/') return str for i in data: try: sql.insertValue(i['name'],i['author'],i['age'],analyze_link(i)) except Exception as e: print(str(e)) print(i) input('error......') sql.commit()
def post(self): ids = self.get_argument('id_del') sql.cur.execute("delete from blog where id=%s"%ids) sql.commit() self.redirect('/xml')
def fullAnalytics(email, url, success): s = int(success) commit('insert into analytics (email, url, success) values (%s, %s, %s)', (email, url, s))
request = session.get(url, headers=headers) if request.status_code == 200: print('Get page = ', url) soup = bs(request.content, 'html.parser') posts_list = soup.find_all('a', 'story__title-link', 'story__title-link_visited') for post in posts_list: url = post['href'] title_name = post.text id = get_id_url(url) category = soup.title.text post = PostUrl(title=title_name, pika_id=id, url=url, category=category) print('parse complete = ', url) else: print("Page not exist") def get_id_url(url): result = re.findall(r'_(\d+)$', url)[0] return result if __name__ == '__main__': for url in url_list: sleep(1) parse_posts_urls(url) commit()
attach_text = p.sub('', attach_text) t = re.sub('<[^>]*>', ' ', text) t = set([i[1:] for i in t.split() if i.startswith("#")]) for tag in t: add_tag(tag) tags.append(tag) detect_tags(tags, text) for tag in tags: if tag in all_tags: sql.upsert('blog_post_tags', {'post_id': id, 'tag_id': all_tags[tag]}) act, pid = sql.upsert('blog_post', {'id': id}, {'datetime': date, 'content': text, 'deleted': False, 'showAds': False}) if 'insert' == act: if not dryRun: text = re.sub('<[^>]*>', ' ', text) post_twitter.send(text, attach_text, "http://mechanicalbear.ru/" + str(id)) post_facebook.send(id, text, image, video, attach_text) post_delicious.send(id, text + ' ' + attach_text, tags) if image > 0: post_tumblr.send(id, image, date) post_flickr.send(id, image, text) #else: #break sql.commit() sql.close()
def wipe(): commit('''drop table unsubs''') commit('''drop table readmail''') commit('''drop table usercount''') commit('''drop table emailhashespositive''') commit('''drop table emailhashestotal''') commit('''drop table analytics''') setup()
def main(argv): argparser = argparse.ArgumentParser(description='') argparser.add_argument('files', nargs='+') args = argparser.parse_args(argv) for fn in args.files: if isdir(fn): continue # read in file print 'opening %s' % fn f = open(fn, 'r') lines_raw = f.readlines() raw_id = None if sql.insert('raws', { 'data': ''.join(lines_raw), 'filename': basename(fn) }): raw_id = sql.lastrowid() progress("Added %d to raws" % raw_id) # preprocess lines = [] try: line_nr = 0 for line in lines_raw: line_nr += 1 line = line.rstrip('\r\n') line = re.split(r'\t|;', line) line = preprocess_line(line) lines.append(line) except: print "%d: %s" % (line_nr, line) raise # add a dummy plant/culture/subspecies, just in case samples can't be connected just yet. if not sql.exists('cultures', 1): sql.insert('cultures', {'id': DUMMY_CULTURE_ID, 'name': 'placeholder' }) if not sql.exists('plants' , 1): sql.insert('plants' , {'id': DUMMY_PLANT_ID, 'name': 'placeholder', 'culture_id': DUMMY_CULTURE_ID }) if not sql.exists('subspecies' , 1): sql.insert('subspecies' , {'id': DUMMY_SUBSPECIES_ID, 'species_id': SPECIES_ID }) # some lines need to be sent back to LIMS, this is where we store them lims_lines = [] # save! line_nr = 0 try: for line in lines: line_nr += 1 program_id = get_program_id(line) if is_sample_plant(line): line[8] = int(line[8]) # plant_id is still str #save_sample_plant(sample_id=line[7], plant_id=line[8], date=date) # skipped because made redundant when preloading all samples/plants lims_lines.append("\t".join([ str(item) for item in line ])) #elif program_id == 1 and is_freshweight_between(line): # lims_lines.append("\t".join([ str(item) for item in line ])) else: phenotype = format_line(line) # create a readable program # add the actual phenotype phenotype_id = None if sql.insert('phenotypes', { 'version': phenotype['version'], 'object' : phenotype['object'], 'program_id': phenotype['program_id'], 'date': phenotype['date'], 'time': phenotype['time'], 'entity_id': phenotype['entity_id'], 'value_id': phenotype['value_id'], 'number': phenotype['number'] }): phenotype_id = sql.lastrowid() progress('Added %d to phenotype' % phenotype_id) # if plant, add it to plants, otherwise to samples if ora_sql.is_plant(phenotype['sample_id']) or ora_sql.was_plant(phenotype['sample_id']): sql.insert('phenotype_plants', { 'phenotype_id': phenotype_id, 'plant_id': phenotype['sample_id'] }) elif ora_sql.is_sample(phenotype['sample_id']): sql.insert('phenotype_samples', { 'phenotype_id': phenotype_id, 'sample_id': phenotype['sample_id'] }) elif ora_sql.is_aliquot(phenotype['sample_id']): sql.insert('phenotype_aliquots', { 'phenotype_id': phenotype_id, 'aliquot_id': phenotype['sample_id'] }) else: print "%s NOT found!!" % phenotype['sample_id'] sql.insert('phenotype_raws' , { 'phenotype_id': phenotype_id, 'raw_id': raw_id, 'line_nr': line_nr }) if program_id > 1: sql.insert('phenotype_bbches', { 'phenotype_id': phenotype_id, 'bbch_id': phenotype['bbch_id']}) except: progress("%d: %s" % (line_nr, line)) raise # save the current saved lines for LIMS write_lims_lines(lims_lines, fn) sql.commit()
def post(self): ids = self.get_argument('id_del') sql.cur.execute("delete from blog where id=%s" % ids) sql.commit() self.redirect('/xml')
def main(argv): argparser = argparse.ArgumentParser(description='') argparser.add_argument('files', nargs='+') args = argparser.parse_args(argv) for fn in args.files: if isdir(fn): continue # read in file print 'opening %s' % fn f = open(fn, 'r') lines_raw = f.readlines() raw_id = sql.fetch_all('raws', {'filename': basename(fn)})[0]['id'] progress("Found %d of %s" % (raw_id, fn)) # preprocess lines = [] try: line_nr = 0 for line in lines_raw: line_nr += 1 line = line.rstrip('\r\n') line = re.split(r'\t|;', line) line = preprocess_line(line) lines.append(line) except: print "%d: %s" % (line_nr, line) raise # save! line_nr = 0 try: for line in lines: line_nr += 1 phenotype = format_line(line) # create a readable program # add the actual phenotype phenotype_id = None phenotype_q_params = dict(phenotype.items() + { 'entity_id': -12345, 'filename': basename(fn) }.items()) del phenotype_q_params['attribute'] del phenotype_q_params['value'] phenotype_q_params['pp.plant_id'] = phenotype_q_params.pop( 'sample_id') q = """ select phenotypes.id from phenotypes join phenotype_raws pr on pr.phenotype_id = phenotypes.id join raws r on r.id = pr.raw_id left join phenotype_plants pp on pp.phenotype_id = phenotypes.id where """ q += ' and '.join( ['%s=%s' % (k, '%s') for k in phenotype_q_params.keys()]) sql_phenotype = sql.fetch_all(None, phenotype_q_params, q) if len(sql_phenotype) == 1: if sql.update('phenotypes', {'id': sql_phenotype[0]['id']}, {'entity_id': phenotype['entity_id']}): phenotype_id = sql.lastrowid() progress('Added %d to phenotype' % phenotype_id) except: progress("%d: %s" % (line_nr, line)) raise sql.commit()