def save(self, best=False): print 'Model.save()' if self.model == None: return False name = '%s_%s'%(self.project.id, self.project.type) prefix = 'best' if best else 'latest' revision = 0 if self.offline: name = '%s_offline'%(name) elif best: revision = DB.getRevision( self.id ) revision = (revision+1)%10 prefix = '%s_%d'%(prefix, revision) # construct the path to the network and weights path = '%s/%s_%s'%(Paths.Models, prefix, name) j_path = '%s.json'%(path) w_path = '%s_weights.h5'%(path) j_path = j_path.lower() w_path = w_path.lower() print 'saving model...' json_string = self.model.to_json() open(j_path, 'w').write(json_string) self.model.save_weights(w_path, overwrite=True) if not self.offline: DB.finishSaveModel( self.project.id, revision ) return True
def setUpClass(cls): app.config['DATABASE'] = config.connstr app.config['TESTING'] = True DB.init_pool(app.config['DATABASE']) with DB(False) as (conn, cursor): # this pretty much cascades to everything, except animals cursor.execute("TRUNCATE users CASCADE") # create a single user/distribution/version cursor.execute("INSERT INTO users (user_name, full_name) VALUES ('test_user', 'test user full name')") cursor.execute("INSERT INTO distributions (user_id, dist_name) VALUES (currval('users_id_seq'), 'testdistr')") cursor.execute("INSERT INTO distribution_versions (dist_id, dist_version, dist_date, dist_status) VALUES (currval('distributions_id_seq'), '1.2.3', '2014-01-02 03:04:05', 'stable')") cursor.execute("INSERT INTO users (user_name, full_name) VALUES ('test_user2', 'test user full name')") cursor.execute("INSERT INTO distributions (user_id, dist_name) VALUES (currval('users_id_seq'), 'testdistr2')") cursor.execute("INSERT INTO distribution_versions (dist_id, dist_version, dist_date, dist_status) VALUES (currval('distributions_id_seq'), '1.2.4', '2014-01-02 03:04:05', 'stable')") cursor.execute("INSERT INTO users (user_name, full_name) VALUES ('test_user3', 'test user full name')") cursor.execute("INSERT INTO distributions (user_id, dist_name) VALUES (currval('users_id_seq'), 'testdistr3')") cursor.execute("INSERT INTO distribution_versions (dist_id, dist_version, dist_date, dist_status) VALUES (currval('distributions_id_seq'), '1.2.5', '2014-01-02 03:04:05', 'stable')") conn.commit()
class FileManager(object): def __init__(self): self.db = DB() def add_files(self, files): # sql = 'insert into songs (name, path) values ' sql = 'insert into songs (name, path, title, album, artist, genre, track, date) values' for file in files: info = cp_get_metadata_py(file.decode('utf-8').encode('utf-8')) file_name = file.rsplit('/')[-1] info['name'] = file_name info['path'] = file sql += '("{name}", "{path}", "{title}", "{album}", "{artist}", "{genre}", "{track}", "{date}"),'.format(**info) sql = sql[:-1] self.db.execute(sql) def del_files(self, ids): """ require: ids: list, tuple, set """ if not ids: return query = str(tuple(ids)) if len(ids)<2: query = query.replace(',', '') sql = 'delete from songs where id in {0}'.format(query) self.db.execute(sql)
def load(self, project): print 'data load...' if self.offline: d = self.gen_samples_offline( nsamples=self.n_train_samples, purpose='train', patchSize=self.project.patchSize, mean=self.project.mean, std=self.project.std) self.x = d[0] self.y = d[1] d = self.gen_samples_offline( nsamples=self.n_valid_samples, purpose='validate', patchSize=self.project.patchSize, mean=d[2], std=d[3]) self.x_valid = d[0] self.y_valid = d[1] print 'x:', np.shape(self.x) print 'y:', np.shape(self.y) print 'xvalid:', np.shape(self.x_valid) print 'yvalid:', np.shape(self.y_valid) else: self.load_validation() self.load_training() DB.finishLoadingTrainingset( project.id )
def configure(): if os.path.isfile(logging_config_file): with open(logging_config_file, 'rt') as f: config = json.load(f) logging.config.dictConfig(config) logger = logging.getLogger(__name__) logging.getLogger("urllib3").setLevel(logging.WARNING) logging.getLogger("requests").setLevel(logging.WARNING) sys.modules[__name__].__dict__['logger'] = logger smtp_handler = logging.getLogger().handlers[2] assert isinstance(smtp_handler, logging.handlers.SMTPHandler) config_smtp_handler(smtp_handler) if restart: DB.drop_qa_collections() validate_config() if not validate_cookie(test_cookie): logger.error("invalid cookie") def handle_exception(exc_type, exc_value, exc_traceback): if issubclass(exc_type, KeyboardInterrupt): sys.__excepthook__(exc_type, exc_value, exc_traceback) return logger.critical("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback)) sys.excepthook = handle_exception
def process_projects(self, process_num, proj_paths, global_queue): db = DB(self.DB_user, self.DB_name, self.DB_pass, self.process_logging) try: FILE_files_tokens_file = os.path.join(self.output_folder,'files-tokens-'+str(process_num)+'.tokens') self.filecount = 0 with open(FILE_files_tokens_file, 'a+') as FILE_tokens_file: p_start = dt.datetime.now() for proj_path in proj_paths: self.process_one_project(process_num, proj_path, FILE_tokens_file, db) p_elapsed = (dt.datetime.now() - p_start).seconds self.process_logging.info('Process %s finished. %s files in %ss.', process_num, self.filecount, p_elapsed) # Let parent know global_queue.put((process_num, self.filecount)) sys.exit(0) except Exception as e: self.process_logging.error('Error in process '+str(process_num)) self.process_logging.error(e) sys.exit(1) finally: db.close()
def save_stats(self): n_data = len(self.p) n_good = len( np.where( self.p == 0 )[0] ) self.accuracy = float(n_good)/n_data print '------data.save_stats-----' print 'accuracy:', self.accuracy Utility.report_status('.', '.') for entry in self.entries: i = np.arange( entry.offset, entry.offset+entry.length ) #y = self.y[ i ] p = self.p[ i ] n_data = len(p) n_good = len( np.where( p == 0 )[0] ) score = 0.0 if n_good == 0 else float(n_good)/n_data #print np.bincount( self.p ), np.bincount( p ), n_good #print len(p), '/', len(self.p) DB.storeTrainingScore( self.project.id, entry.name, score ) Utility.report_status('%s'%(entry.name), '%.2f'%(score)) #print 'image (%s)(%.2f)'%(entry.name, score) Utility.report_status('.', '.')
def newTimeSlice(self): db=DB() query='INSERT INTO "tbl_TimeSlice" ("timeSliceDateFrom", "timeSliceTimeFrom") VALUES (current_date, current_time);' db.doDB(query) query='SELECT last_value FROM "tbl_Timeslice_idTimeslice_seq";' idTimeSlice=db.readDB(query)[0][0] return idTimeSlice
def channelRank(self, idTimeSlice='SELECT last_value FROM "tbl_Timeslice_idTimeslice_seq"'): db=DB() query='SELECT count(*) "channelNewTwits", t."idChannel", c."channelName", c."channelNumber", c."channelImage" \ FROM "tbl_Twit" t JOIN "tbl_Channel" c \ ON t."idChannel" = c."idChannel" \ JOIN "tbl_ChannelHashtag" ch \ ON c."idChannel" = ch."idChannel" \ AND ch."channelHastagEnabled"=TRUE \ AND t."idTimeslice" = (%s)-1 \ GROUP BY t."idChannel", c."channelName", c."channelNumber", c."channelImage" \ ORDER BY "channelNewTwits" desc \ LIMIT 10;' % (idTimeSlice) rank=db.readDB(query) i=0 print '\n\n*********************' print '* Trending Channels *' print '*********************\n\n' while i<len(rank): twits=rank[i][0] idChannel=rank[i][1] channelName=rank[i][2] channelNumber=rank[i][3] print '%d) %s %s' % (i+1, channelName, channelNumber) i+=1 print '\n\n' return rank
class DBLPHandler(xml.sax.ContentHandler): def __init__(self): # super(DBLPHandler, self).__init__() xml.sax.ContentHandler.__init__(self) self.PUB_TYPES = ["article", "inproceedings", "proceedings", "book", "incollection", "phdthesis", "mastersthesis", "www"] self.FIELDS = ["author", "editor", "title", "booktitle", "pages", "year", "address", "journal", "volume", "number", "month", "url", "ee", "cdrom", "cite", "publisher", "note", "crossref", "isbn", "series", "school", "chapter"] self.publication = None self.content = '' self.db = DB() self.count = 0 def startElement(self, name, attrs): if name in self.PUB_TYPES: print self.count self.count += 1 key = attrs.getValue("key") self.publication = Publication(name, key) self.content = '' if name in self.FIELDS: self.content = '' def endElement(self, name): if name in self.PUB_TYPES: self.db.dumps(self.publication) if name in self.FIELDS: self.publication.add_field(name, self.content) def characters(self, content): self.content += content.encode('utf-8').replace('\\','\\\\')
def hashtags(self): db=DB() #Query definition query = 'SELECT c."idChannel", c."channelNumber", c."channelName", ch."channelHashtagText" \ FROM "tbl_Channel" c JOIN "tbl_ChannelHashtag" ch \ ON c."idChannel"=ch."idChannel";' registros=db.readDB(query) #Local Array hashTags=[] #Reorganize DB response i=0 add=1 while i<len(registros): idChannel=registros[i][0] channelNumber=registros[i][1] channelName=registros[i][2] ht=registros[i][3] if registros[i][0]==registros[i-1][0] and i!=0: row.append(registros[i][3]) add=0 else: row = [i, idChannel, channelNumber, channelName, ht] add=1 if add: hashTags.append(row) i+=1 return hashTags
def add_input(self, inp): """ since inputs are really just outputs, this will be slowly converted to just accepting an input value and it finds previous output values from past transactions.""" target_val = inp.value # find all previous unspent outputs.... from db import DB db = DB() outputs = db.getUnspentOutputs(self.owner.publickey()) if len(outputs) > 2: self.consolidateOutputs(outputs) outputs = db.getUnspentOutputs(self.owner.publickey()) # find enough outputs to total the requested input... val = 0 for o in outputs: val += o.value inp = Transaction.Input(o.value, o.transaction, o.n, owner=self.owner, output=o) self.input.append(inp) if val > target_val: break # compute change and create a new output to ourselves. diff = val - target_val if diff < 0: raise Exception('Output exceeds input!') # 'manually' add the change as an output back to ourselves o = Transaction.Output(diff, self.owner.publickey()) self.output.append(o) o.n = len(self.output) return self
class AddEvent(tornado.web.RequestHandler): def initialize(self): self.webdatabase = DB() def get(self): token = self.get_argument('token', None) platform = self.get_argument('platform', None) low = self.get_argument('low', None) high = self.get_argument('high', None) language = self.get_argument('language', '') if self.webdatabase.exists(token, platform): d = dict() if low: d['low'] = float(low) if high: d['high'] = float(high) if language: d['language'] = language try: self.webdatabase.update(token, platform, d) self.write('success') except Exception as e: self.write(str(e)) else: if not low: low = -1.0 if not high: high = -1.0 try: self.webdatabase.insert(token, platform, low, high, language) self.write('success') except Exception as e: self.write(str(e))
def edit_congregation(self, row): """ Prepares user entered data for the selected congregation before sending it to the db module for updating it in the database. Checks conducted: Check for required fields the user may have left blank. :param row: The id within the table Congregation being edited. """ values = (self.name, self.phone, self.email, self.street, self.city, self.state, self.zip, self.week, self.time, self.long, self.lat, self.note, self.visibility) # REVIEW long and lat: Leading zeros may be removed. missing_fields = Congregation.__check_required_fields(self) if missing_fields == "Passed": DB.modify_item(None, 'Congregation', Congregation.columns, values, row) else: print("A required field was missing: {}".format(missing_fields[1]))
def add_congregation(self): """ Prepares user entered data for a new congregation before sending it to the db module for insertion into the database. """ values = (self.name, self.phone, self.email, self.street, self.city, self.state, self.zip, self.week, self.time, self.long, self.lat, self.note, self.visibility) # REVIEW long and lat: Leading zeros may be removed. dup_congregation = Congregation.__check_for_dup(self, values[0]) missing_fields = Congregation.__check_required_fields(self) if dup_congregation == "Passed" and missing_fields == "Passed": DB.add_item(None, 'Congregation', Congregation.columns, values) else: if dup_congregation != "Passed": print("A duplicate entry was found: {}".format( dup_congregation[1])) else: print("A required field was missing: {}".format( missing_fields[1]))
def edit_brother(self, row): """ Prepares user entered data for the selected brother before sending it to the db module for updating it in the database. Checks conducted: Check for required fields the user may have left blank. :param row: The id within the table Congregation being edited. """ values = [self.first_name, self.middle_name, self.last_name, self.email, self.phone, self.congregation, self.responsibility, self.speaker, self.chairman, self.coordinator, self.note, self.visibility] # Check for missing fields # TODO: There's a bug when you try to save while there's a missing field missing_fields = Brother.__check_required_fields(self) if missing_fields == "Passed": DB.modify_item(None, 'Brother', Brother.columns, values, row) else: print("A required field was missing: {}".format(missing_fields)) # TODO Add a check for duplicates
def add_brother(self): """ Adds a new brother to the database. :return: """ values = (self.first_name, self.middle_name, self.last_name, self.email, self.phone, self.congregation, self.responsibility, self.speaker, self.chairman, self.coordinator, self.note, self.visibility) missing_fields = Brother.__check_required_fields(self) if missing_fields == "Passed": DB.add_item(None, 'Brother', Brother.columns, values) else: print("The following are missing: ", missing_fields)
def animate_flat_plot(f, step=10, limit=None, figsize=None): db = DB() if type(f) != list: f = [f, ] # Figure out how many frames we will have # ...not very efficient max = 1 for i in range(len(f)): n = len(db.load_roots(f[i], raw=False)) if max < n: max = n if (limit and max < limit) or not limit: limit = max frames = ceil(limit / step) plots = list() for i in range(frames): j = (i + 1) * step plots.append(flat_plot(f, j, False)) figsize = (15, len(f)) return animate(plots, figsize=figsize)
def post(self): print ('-->BrowseHandler.post...', self.request.uri) tokens = self.request.uri.split(".") if len(tokens) > 2 and tokens[1] == 'stop': DB.stopProject( tokens[2] ) elif len(tokens) > 2 and tokens[1] == 'start': DB.startProject( tokens[2] )
def search(self, search, limit=1000, excludedcats={}): mdb = DB() # if the query starts with a ^ it indicates the search is looking for items which start with the term # still do the like match, but mandate that all items returned must start with the provided word words = search.split(' ') searchsql = '' intwordcount = 0 if len(words) > 0: for word in words: # see if the first word has a caret, which indicates search must start with term if intwordcount == 0 and word[0] == '^': searchsql += ' and b.name like %s%' % (mdb.escapeString(word[1:])) else: searchsql += ' and b.name like %s' % (mdb.escapeString('%'+word+'%')) intwordcount += 1 exccatlist = '' if len(excludedcats) > 0: exccatlist = 'and b.categoryID not in ('+','.join(excludedcats)+') ' res = mdb.query('''SELECT b.*, g.name AS group_name, r.guid, (SELECT COUNT(ID) FROM parts p where p.binaryID = b.ID) as 'binnum' FROM binaries b INNER JOIN groups g ON g.ID = b.groupID LEFT OUTER JOIN releases r ON r.ID = b.releaseID WHERE 1=1 %s %s order by DATE DESC LIMIT %d ''', (searchsql, exccatlist, limit)) return res
def animate_density_plot(f, frames, precision=50, **kw): """ """ db = DB() total = db.count(f) n = int(total // frames) lst = list() ymax = 0 for i in range(frames - 1): stop = (i + 1) * n lst.append(density(f, precision, stop=stop)) if ymax < lst[-1].get_minmax_data()['ymax']: ymax = lst[-1].get_minmax_data()['ymax'] lst.append(density(f, precision)) # Plot options plot_opts = {'xmin': kw.get('xmin'), 'xmax': kw.get('xmax'), 'ymin': kw.get('ymin', 0), 'ymax': kw.get('ymax', ymax)} return animate(lst, **plot_opts)
class Main(object): def __init__(self): self.mainspider = HustSpider() self.db = DB() def do(self,id): res = self.mainspider.get_info(id) if not res: self.mainspider = HustSpider() return res = HustParser(res) if res.name: self.db.insert_info(res) else: pass return def run(self): for i in range(1,8000): i = str(i) while len(i)<4: i = '0'+i id = 'U20121'+i try: self.do(id) except: print(("#error:%s"%id)) continue
def getuuid(self, projectId, imageId, guid): data = {} project = DB.getProject( projectId ) task = DB.getImage( projectId, imageId ) expiration = project.syncTime*4 if task.annotationLockId == guid: data['uuid'] = DB.lockImage( projectId, imageId ) now = datetime.now() annotationTime = datetime.strptime(task.annotationTime, '%Y-%m-%d %H:%M:%S') diff = now - annotationTime print 'diff: ', diff.total_seconds() elif task.annotationStatus == 1: now = datetime.now() annotationTime = datetime.strptime(task.annotationTime, '%Y-%m-%d %H:%M:%S') diff = now - annotationTime diff = diff.total_seconds() print 'time diff:', diff if diff > expiration: data['uuid'] = DB.lockImage( projectId, imageId ) else: data['uuid'] = DB.lockImage( projectId, imageId ) return Utility.compress(json.dumps( data ))
def __init__(self,pollId=None): DB.__init__(self) table = 'autoid' if pollId: table = 'autoid_%s'%pollId self.table = self.database[table]
class XPriceHistory(XBase): def __init__(self): super(XPriceHistory, self).__init__() self.db=DB() self.days=[] self.ranges =[] for i in self.cfg.readlines(): i = i.rstrip() if i: print "%s"%i (day,value) = i.split("=") self.days.append(timedelta(days=int(day))) self.ranges.append([int(x) for x in value.split(',')]) def filter(self,strs): return True def format(self,str_list): """ str_list symbol date .... format return list[x1,x2,x3...] """ rlist=[] if(not self.filter(str_list)): return None symbol=str_list[0] date = str_list[1] current = self.db.get_pre(symbol,date) if None == current: logging.error("No current data symbol %s date %s"%(symbol,date)) return None i = 0 for days_interval in self.days: pre_day = date - days_interval pre_one = self.db.get_pre(symbol, pre_day) if None == pre_one: logging.error("No next one data symbol %s date %s, pre_day %s"%(symbol,date,pre_day)) return None # long time no trade if(date - pre_one[1] > (days_interval*2 if days_interval > timedelta(days=2) else timedelta(days=3))): logging.error("Too long before trade symbol %s date %s, pre_day %s\n current %s \n pre one%s " % (symbol,date,pre_day,current,pre_one)) return None perfor = self.get_performance(pre_one,current,self.ranges[i]) rlist.append(perfor) i = i+1 return rlist def get_performance(self, pre, next_one,criteria): a = 100*(next_one[7] - pre[7])//pre[7] for i in range(0,len(criteria)): if a < criteria[i]: return i return len(criteria)
def valveJob(setting): #(valve, onDuration) print 'OPENING VALVE' tft.markActiveJob(setting['id'], True); durationLeft = int(setting['on_duration']) + 2 #binaryValveList = map(int, list(format(setting['valve'], '08b'))) #print binaryValveList pump = Relay() pump.on() time.sleep(1) #valves = Shiftregister() #shiftreg.outputList(binaryValveList) valves.outputDecimal(setting['valve']) #valves.enable() while durationLeft > 2: time.sleep(1) durationLeft -= 1 print 'TIME LEFT: %i' % (durationLeft - 1) print 'CLOSING VALVE' pump.off() print 'reset shift register 1' #valves.disable() valves.reset() time.sleep(1) #valves.reset() tft.markActiveJob(setting['id'], False); db = DB() db.addLogLine(setting, datetime.now()) return
class ChromaTest(unittest.TestCase): def setUp(self): logging.basicConfig(level=logging.DEBUG) conf = MockConf(dbstring="sqlite://") self.db = DB(conf) self.db.create_all() self.sess = self.db.session() self.sess.add_all(Region.create_from_json(TEST_LANDS)) self.sess.commit() # Create some users self.alice = self.create_user("alice", 0) self.bob = self.create_user("bob", 1) def create_user(self, name, team): newbie = User(name=name, team=team, loyalists=100, leader=True) self.sess.add(newbie) cap = Region.capital_for(team, self.sess) newbie.region = cap self.sess.commit() return newbie def get_region(self, name): name = name.lower() region = self.sess.query(Region).filter_by(name=name).first() return region
def __init__(self, file_list_projects): self.target_folders = str(time.time()) # Creating folder for the processes logs self.logs_folder = os.path.join(self.PATH_logs,self.target_folders) if os.path.exists( self.logs_folder ): logging.error('Folder [%s] already exists!' % self.logs_folder ) sys.exit(1) else: os.makedirs(self.logs_folder) # Create folder for processes output self.output_folder = os.path.join(self.PATH_output,self.target_folders) if os.path.exists( self.output_folder ): logging.error('Folder [%s] already exists!' % self.output_folder ) sys.exit(1) else: os.makedirs(self.output_folder) # Logging code FORMAT = '[%(levelname)s] (%(asctime)-15s) %(message)s' logging.basicConfig(level=logging.DEBUG,format=FORMAT) file_handler = logging.FileHandler( os.path.join(self.logs_folder,'tokenizer.log') ) file_handler.setFormatter(logging.Formatter(FORMAT)) logging.getLogger().addHandler(file_handler) self.read_config() db = DB('pribeiro','CPP','pass',logging) logging.info('Database \''+self.DB_name+'\' successfully initialized') db.close() self.proj_paths = self.read_file_paths(file_list_projects)
def populate_list(self): """Populates the talk_list widget with the outlines Format of outline_list: [(DB ID, number, title, visibility), ...] """ db = DB() self.table_outline.clearContents() sql_number_sort = "SELECT * FROM Talk WHERE visibility='True' ORDER " \ "BY CAST (number AS INTEGER)" sql_title_sort = "SELECT * FROM Talk WHERE visibility='True' ORDER BY" \ " title ASC" if self.radio_number.isChecked(): outline_list = DB.return_sql(None, sql_number_sort) else: outline_list = DB.return_sql(None, sql_title_sort) self.table_outline.setColumnCount(2) self.table_outline.setRowCount(db.count_rows('Talk', True)) self.sorted_list = [] # Table IDs of items added sorted to the table index = 0 # Index of table_outline widget for item in outline_list: number = QtGui.QTableWidgetItem(item[1]) title = QtGui.QTableWidgetItem(item[2]) self.table_outline.setItem(index, 0, number) self.table_outline.setItem(index, 1, title) self.sorted_list.append(item[0]) index += 1
def categoryDB(keywords): print '%s\n\n\n' % keywords db = DB(host='190.215.44.18', port='5432', dbname='GLF', user='******', password='******') response = [] for keyword in keywords: print keyword query = 'SELECT c."channelNumber", c."channelName", s."startDate", s."endDate", p."title", p."description", pc."mscName" \ FROM "tbl_Channel" c, "tbl_Schedule" s, "tbl_Program" p, "tbl_ProgramCategory" pc, "tbl_KeywordCategory" kc, "tbl_Keyword" k \ WHERE (c."idChannel"=s."idChannel" AND s."idProgram"=p."idProgram" AND p."idCategory"=pc."idCategory" AND pc."idCategory"=kc."idCategory" AND k."idKeyword"=kc."idKeyword" AND s."startDate">current_timestamp AND k."keywordName"=\''+keyword+'\') \ ORDER BY "startDate" LIMIT 1;' #query = 'SELECT "categoryName" FROM "tbl_ProgramCategory" pc, "tbl_KeywordCategory" kc, "tbl_Keyword" k WHERE (pc."idCategory"=kc."idCategory" AND k."idKeyword"=kc."idKeyword" AND k."keywordName"=\''+keyword+'\');' try: programs = db.readDB(query) except: return response i=0 progArr = [] while i<len(programs): channelNumber=programs[i][0] channelName=programs[i][1] startDate=str(programs[i][2]) endDate=str(programs[i][3]) title=programs[i][4] description=programs[i][5] mscName=programs[i][6] progArr.append({'channelNumber':channelNumber, 'channelName':channelName, 'startDate':startDate, 'endDate':endDate, 'title':title, 'description':description, 'mscName':mscName}) i+=1 response.append(progArr) return response
file_handler = logging.FileHandler(log_path) file_handler.setFormatter(logging.Formatter(FORMAT)) logging.getLogger().addHandler(file_handler) if len(sys.argv) == 1: logging.error('ERROR. At least 1 argument is required') sys.exit(1) if len(sys.argv) >= 2: DB_name = sys.argv[1] if len(sys.argv) >= 3: output_path = sys.argv[2] if len(sys.argv) >= 4: pairs_path = sys.argv[3] try: db_object = DB(user, DB_name, passw, logging) logging.info('Starting DB: ' + DB_name + ' with ' + user + ':' + passw) if len(sys.argv) >= 2: logging.info('### Creating Tables') db_object = DB(user, DB_name, passw, logging) if len(sys.argv) >= 3: logging.info('### Importing output from tokenizer') import_tokenizer_output(db_object, output_path, logging) if len(sys.argv) >= 4: logging.info('### Importing output from tokenizer') #import_pairs(pairs_path) db_object.close()
from db import DB from datetime import datetime from decimal import Decimal db = DB('walgreens', 'root', 'root') # result = db.proc('adjustment', (1, 100.00, 1, 'tp_trans', '--')) # print result # print result[4] # data = { # 'trans_type': '520', # 'account_nbr': '1234567890123456', # 'trans_date': datetime.now(), # 'in_ts': datetime.now(), # 'merch_name': 'fake_name', # 'merch_city': 'fake city', # 'merch_state': 'UT', # 'acq_id': '12345', # 'amt': Decimal(int('12345') / 100.0), # 'status': 'N' # } # id = db.insert('tp_trans', data) # print id
attachment_image_url="", attachment_text=""): if response is not None: slack_client.api_call("chat.postMessage", channel=channel, text=response, attachments=[{ "fallback": attachment_text, "image_url": attachment_image_url }], as_user=(False if not response else True)) if __name__ == "__main__": READ_WEBSOCKET_DELAY = 0.5 # 0.5 second delay between reading from firehose swr_db = DB("swear_db.txt") usr_db = DB("user_db.txt") commands = [ Command( "add", "Commands.add(text, user, usernames[user], swr_db)", 'Add a word to the swear database. Usage: `@bot add "wordtoadd" [0..3]`', '{0} add ".{{1,64}}" -?\d{{1,}}|{0} add ".{{1,64}}"'.format( Config.AT_BOT)), Command("highscore", "Commands.highscore(usernames, usr_db)", "List users' profanity."), Command("swears", "Commands.swears(swr_db)", "List all monitored swearwords."), Command("help", "Commands.help(keywords)", "Show this help message."), Command("", "'', 'Deal with it!', 'http://i.imgur.com/9PO2N1V.jpg'", "", ":deal_robi:") ]
#option = int(input("")) option = 1 a = a.A(path + "." + crawlers[option - 1]) next_page = True number = 0 counter = 91 while next_page is not None and number <= 2500: s = DB.select() print("[+] Gathering links...") links, next_page = a.get_download_links(counter + 2) print("Next page: " + str(next_page)) if links is not None: print("[+] Downloading list of programs...") for link in links: flag = True
action="store_false", dest="addtrees", help="clean %s" % config["juno-base"]) ap.add_argument("-w", dest="workers", default=4, type=int, help="number of workers (default 4)") return vars(ap.parse_args()) if __name__ == "__main__": config = common.load_config() args = parse_args() common.mkdirs(config["juno-base"]) os.chdir(config["juno-base"]) lock = common.Lock(".lock") os.nice(10) db = DB(".db") check_fs(db) check_db(db) db.close() if args["addtrees"]: add_trees()
def foo(): fs = FileSystem() db = DB() path = './testing/history_20201123_133040.xls' bank = 'ipko' # dane.imp_data('./testing/Zestawienie operacji.xlsx', 'raifeisen') # dane.imp_data('./testing/Zestawienie operacji (1).xlsx', 'raifeisen_kredyt') fs.setIMP(path) read_SQL = 0 if read_SQL: db.open_db(fs.getDB()) print(db.msg) else: db.imp_data(fs.getIMP(), bank) print(db.msg) db.imp_commit('ok') print(db.msg) print(db.cat.opers()) fltr = { db.COL_NAME: 'typ_transakcji', db.SEL: 'txt_match', db.FILTER: 'Wypłata z bankomatu', db.OPER: 'add', db.CATEGORY: 'bankomat' } db.cat.add(fltr=fltr) print(db.msg) print(db.op.get('bankomat')) print(db.cat.cat) print(db.tree.tree) print(db.op.sum_data('kwota', 'bankomat')) split = { db.START: '2020-08-28 00:00:00', db.END: '2020-11-20 00:00:00', db.COL_NAME: db.CATEGORY, db.FILTER: 'bankomat', db.VAL1: -20, db.DAYS: 3 } db.split.add(split=split) print(db.op.sum_data('kwota', 'bankomat')) print(db.op.sum_data('kwota', 'split:bankomat')) fltr = { db.COL_NAME: 'opis_transakcji', db.SEL: 'txt_match', db.FILTER: 'ITALKI', db.OPER: 'add', db.CATEGORY: 'italki' } db.cat.add(fltr) print(db.msg) print(db.op.get('italki')) print(db.cat.cat) print(db.tree.tree) fltr = { db.COL_NAME: 'lokalizacja', db.SEL: 'txt_match', db.FILTER: 'ITALKI', db.OPER: 'add', db.CATEGORY: 'nauka' } db.cat.add(fltr) print(db.msg) print(db.op.get('nauka')) print(db.cat.cat) print(db.tree.tree) db.cat.rm(oper_n=1, category='nauka') print(db.msg) print(db.op.get('nauka')) print(db.cat.cat) print(db.tree.tree) db.cat.rm(category='italki') print(db.msg) print(db.op.get('italki')) print(db.cat.cat) print(db.tree.tree) fltr = [{ db.COL_NAME: 'lokalizacja', db.SEL: 'txt_match', db.FILTER: 'PANEK', db.OPER: 'add', db.CATEGORY: 'panek' }] fltr.append({ db.COL_NAME: 'lokalizacja', db.SEL: 'txt_match', db.FILTER: 'PANEK', db.OPER: 'add', db.CATEGORY: 'panek' }) db.cat.add(fltr) print(db.msg) print(db.op.get('panek')) print(db.cat.cat) print(db.tree.tree) fltr = [{ db.COL_NAME: 'opis_transakcji', db.SEL: 'txt_match', db.FILTER: 'ITALKI', db.OPER: 'add', db.CATEGORY: 'italki' }] fltr.append({ db.COL_NAME: 'lokalizacja', db.SEL: 'txt_match', db.FILTER: 'ITALKI', db.OPER: 'add', db.CATEGORY: 'italki' }) db.cat.add(fltr) print(db.msg) print(db.op.get('italki')) print(db.cat.cat) print(db.tree.tree) db.cat.mov(oper_n=2, new_oper_n=1, category='panek') print(db.msg) print(db.op.get('panek')) print(db.cat.cat) print(db.tree.tree) db.cat.mov(oper_n=2, new_oper_n=1, category='italki') print(db.msg) print(db.op.get('italki')) print(db.cat.cat) print(db.tree.tree) db.cat.ren(new_category='nauka2', category='nauka') print(db.msg) print(db.op.get('nauka')) print(db.op.get('nauka2')) print(db.cat.cat) print(db.tree.tree) db.cat.ren(new_category='italki2', category='italki') print(db.msg) print(db.op.get('italki')) print(db.op.get('italki2')) print(db.cat.cat) print(db.tree.tree) db.tree.add(parent='panek', child='inny_panek') print(db.op.get('panek')) print(db.cat.cat) print(db.tree.tree) db.tree.ren(category='inny_panek', new_category='skasuj') print(db.msg) print(db.op.get('italki')) print(db.cat.cat) print(db.tree.tree) db.tree.ren(category='panek', new_category='skasuj') print(db.msg) print(db.op.get('italki')) print(db.cat.cat) print(db.tree.tree) db.tree.mov(new_parent='skasuj', child='italki2') print(db.msg) print(db.op.get('italki2')) print(db.cat.cat) print(db.tree.tree) db.tree.rm(child='skasuj') print(db.msg) print(db.op.get('italki2')) print(db.cat.cat) print(db.tree.tree) fltr = { db.COL_NAME: 'typ_transakcji', db.SEL: 'txt_match', db.FILTER: 'Wypłata z bankomatu', db.OPER: 'add', db.CATEGORY: 'bankomat' } db.cat.add(fltr=fltr) print(db.msg) print(db.op.get('bankomat')) print(db.cat.cat) print(db.tree.tree) print(db.trans.opers()) fltr = [{ db.COL_NAME: 'nazwa_nadawcy', db.SEL: 'txt_match', db.FILTER: 'ALINA MAŁGORZATA OLENDER ZIELASKOWS KA', db.OPER: 'add', db.CATEGORY: 'zwrot' }] fltr.append({ db.COL_NAME: 'kwota', db.SEL: 'greater >', db.FILTER: '1200', db.OPER: 'rem', db.CATEGORY: 'zwrot' }) db.cat.add(fltr=fltr) print(db.msg) print(db.op.get('zwrot')) print(db.cat.cat) print(db.tree.tree) fltr = { db.COL_NAME: 'nazwa_nadawcy', db.SEL: 'txt_match', db.FILTER: 'ALINA MAŁGORZATA OLENDER ZIELASKOWS KA', db.OPER: 'add', db.CATEGORY: 'stypendium' } db.cat.add(fltr=fltr) print(db.msg) print(db.op.get('stypendium')) print(db.cat.cat) print(db.tree.tree) trans = [{ 'bank': 'bnp_kredyt', 'col_name': 'kwota', 'oper': '*', 'val1': -1 }] trans.append({ 'bank': 'ipko', 'col_name': 'opis_transakcji', 'oper': 'str.replace', 'val1': 'Tytuł: ', 'val2': '' }) trans.append({ 'bank': 'ipko', 'col_name': 'lokalizacja', 'oper': 'str.replace', 'val1': 'Lokalizacja: ', 'val2': '' }) trans.append({ 'bank': 'ipko', 'col_name': 'lokalizacja', 'oper': 'str.replace', 'val1': 'Kraj: ', 'val2': '' }) trans.append({ 'bank': 'ipko', 'col_name': 'lokalizacja', 'oper': 'str.replace', 'val1': ' Miasto:', 'val2': '' }) trans.append({ 'bank': 'ipko', 'col_name': 'lokalizacja', 'oper': 'str.replace', 'val1': ' Adres:', 'val2': '' }) trans.append({ 'bank': '', 'col_name': 'typ_transakcji', 'oper': 'str.replace', 'val1': 'Card transaction', 'val2': 'Płatność kartą' }) db.trans.add(trans) print(db.trans.trans) db.trans.rm(trans_n=2) print(db.trans.trans) db.trans.mv(trans_n=2, new_trans_n=3) print(db.trans.trans) print(db.write_db(fs.getDB()))
def main(): db = DB() while True: print(''' -------------------------------------------------- 手机管理系统 1.手机录入 2.根据手机品牌查询手机信息 3.查询全部收集信息 4.根据手机编号修改手机价格 5.根据手机编号删除记录 6.退出 -------------------------------------------------- ''') oper = input() if oper == '1': id = eval(input('请输入序号:')) if isinstance(id, int): brand = input('请输入品牌:') model = input('请输入型号:') price = eval(input('请输入价格:')) if isinstance(price, (int, float)): count = input('请输入数量:') if isinstance(count, int): version = input('请输入版本:') info = Info(ID(id), Brand(brand), Model(model), Price(price), Count(count), Version(version)) db.insert(info) print('添加成功') # db.commit() db.display_all() time.sleep(5) else: print('输入错误,请输入数字') time.sleep(1) else: print('输入错误,请输入数字') time.sleep(1) else: print('输入错误,请输入数字') time.sleep(1) elif oper == '2': brand_str = input('请输入手机品牌:') db.search_by_brand(brand_str) time.sleep(5) elif oper == '3': db.display_all() time.sleep(5) elif oper == '4': id = eval(input('请输入编号')) if isinstance(id, int): price = eval(input('请输入修改的价格')) if isinstance(price, (int, float)): db.update_by_id(id, price) print('修改成功!') db.display_all() time.sleep(5) else: print('输入错误,请输入数字') time.sleep(1) else: print('输入错误,请输入数字') time.sleep(1) elif oper == '5': id = eval(input('请输入编号')) if isinstance(id, int): db.del_by_id(id) print('删除成功!') db.display_all() time.sleep(5) else: print('输入错误,请输入数字') time.sleep(1) elif oper == '6': print('程序退出,谢谢您的使用!') break else: print('对不起,您的输入有误,请重新输入!') time.sleep(1)
class Auth: """Auth class to interact with the authentication database. """ def __init__(self): """Initialisation """ self._db = DB() def register_user(self, email: str, password: str) -> User: """ register_user - registers user to database Args: - email: str, takes user email - password: str, takes user password Return: - User instance after registration Exception: - raises ValueError if user already exists """ try: self._db.find_user_by(email=email) raise ValueError(f"User {email} already exists") except (AttributeError, NoResultFound): password = _hash_password(password) return self._db.add_user(email, password) def valid_login(self, email: str, password: str) -> bool: """ valid_login - check the validation of login credentials Args: - email: str, user email - password: str, user password Return: - True or False """ try: user = self._db.find_user_by(email=email) if not user: return False password = password.encode('utf-8') validation = bcrypt.checkpw(password, user.hashed_password) if validation: return True return False except Exception as e: return False def create_session(self, email: str): """ create_session - creates new session and store it in DB Args: - email: str, user email Return: - session ID as string """ try: user = self._db.find_user_by(email=email) except NoResultFound: return None session_id = _generate_uuid() user.session_id = session_id self._db._session.commit() return session_id def get_user_from_session_id(self, session_id: str) -> User: """ get_user_from_session_id - gets user for requested session Args: - session_id: str, user sessionID Return: - User instance corresponding to session ID """ if session_id is None: return None try: user = self._db.find_user_by(session_id=session_id) return user except NoResultFound: return None def destroy_session(self, user_id: int) -> None: """ destroy_session - destroys a user session Args: - user_id: int, id corresponding to user Return: - None """ try: user = self._db.find_user_by(id=user_id) user.session_id = None self._db._session.commit() except NoResultFound: return None def get_reset_password_token(self, email: str) -> str: """ get_reset_password_token - generate a reset password token Args: - email: str, user email Return: - reset_token as string """ try: user = self._db.find_user_by(email=email) new_token = _generate_uuid() user.reset_token = new_token self._db._session.commit() return new_token except NoResultFound: raise ValueError def update_password(self, reset_token: str, password: str) -> None: """ update_password - updates password using generated reset_token Args: - reset_token: str, generated uuid to verify user - password: str, user new password to update Return: - None """ try: user = self._db.find_user_by(reset_token=reset_token) password = _hash_password(password) user.hashed_password = password user.reset_token = None self._db._session.commit() return None except Exception as e: raise ValueError
def main(): db = DB(config) extract_pages(db)
def __init__(self): """Initialisation """ self._db = DB()
from flask import Flask, session, redirect, render_template, flash, url_for from werkzeug.security import generate_password_hash, check_password_hash from models import UsersModel, CarsModel, DealersModel from forms import LoginForm, RegisterForm, AddCarForm, SearchPriceForm, SearchDealerForm, AddDealerForm from db import DB app = Flask(__name__) app.config['SECRET_KEY'] = 'yandexlyceum_secret_key' db = DB() UsersModel(db.get_connection()).init_table() CarsModel(db.get_connection()).init_table() DealersModel(db.get_connection()).init_table() @app.route('/') @app.route('/index') def index(): """ Главная страница :return: Основная страница сайта, либо редирект на авторизацю """ # если пользователь не авторизован, кидаем его на страницу входа if 'username' not in session: return redirect('/login') # если админ, то его на свою страницу if session['username'] == 'admin': return render_template('index_admin.html', username=session['username']) # если обычный пользователь, то его на свою cars = CarsModel(db.get_connection()).get_all()
def __init__(self, proxy_enable=False, proxy_max_num=setting.PROXY_MAX_NUM, timeout=setting.HTTP_TIMEOUT, cmd_args=None): spider.Spider.__init__(self, proxy_enable, proxy_max_num, timeout=timeout, cmd_args=cmd_args) # 网站名称 self.siteName = "甘肃省招标中心" # 类别码,01新闻、02论坛、03博客、04微博 05平媒 06微信 07 视频、99搜索引擎 self.info_flag = "99" # 入口地址列表 # self.start_urls = ["http://www.bidcenter.com.cn/viplist-1.html"] self.start_urls = ["http://ggzyjy.gansu.gov.cn"] self.encoding = 'gbk' self.site_domain = 'ggzyjy.gansu.gov.cn' self.dedup_uri = None self.headers = { # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', # 'Accept-Encoding': 'gzip, deflate', # 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', # 'Cache-Control': 'max-age=0', # 'Connection': 'keep-alive', # 'Cookie': 'UM_distinctid=1670593cdcc4f5-0800ff5ff60ef9-594d2a16-15f900-1670593cdce395; CNZZDATA1264557630=446493554-1541984678-%7C1541984678', # 'DNT': '1', # 'Host': 'www.scggzy.gov.cn', # 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36', # "Referer":"https://www.bidcenter.com.cn", # 'Cookie': 'ASP.NET_SessionId=edvtzkuc3fir5uo0dgd33pwl; UM_distinctid=166e2d98409596-08af12546c38b9-12656e4a-1fa400-166e2d9840a47e; CNZZDATA888048=cnzz_eid%3D758459646-1541404197-%26ntime%3D1541404197; Hm_lvt_9954aa2d605277c3e24cb76809e2f856=1541404198; Hm_lpvt_9954aa2d605277c3e24cb76809e2f856=1541404198', } # self.proxy_enable = "http://spider-ip-sync.istarshine.net.cn/proxy_100ms.txt" #self.proxy_url = 'http://spider-ip-sync.istarshine.net.cn/proxy_100ms.txt' self.request_headers = {'headers': self.headers} self.conn_config = redis.StrictRedis.from_url('redis://192.168.1.34/1') redis_ip = self.conn_config.get("redis_ip") redis_db = self.conn_config.get("redis_db") mysql_ip = self.conn_config.get("mysql_ip") mysql_databases = self.conn_config.get("mysql_databases") mysql_username = self.conn_config.get("mysql_username") mysql_password = self.conn_config.get("mysql_password") mysql_list_info = self.conn_config.get("mysql_table1") result1 = self.conn_config.get("mysql_list_model_filter") base2 = self.conn_config.get("mysql_detail_info") try: self.conn = redis.StrictRedis.from_url('redis://{0}/{1}'.format( redis_ip, redis_db)) except: self.url_db = None self.db = DB().create('mysql://{0}:{1}@{2}:3306/{3}'.format( mysql_username, mysql_password, mysql_ip, mysql_databases)) self.table = mysql_list_info self.result1 = result1 self.base2 = base2 self.sess = requests.session() self.all = {}
def getIDByName(name): mdb = DB() res = mdb.queryOneRow('SELECT * FROM groups WHERE name = %s', (name, )) return res['ID']
from flask import Flask, render_template, Response, redirect, request, url_for from camera import WebcamVideoStream from plateProcessing import PlateProcessing from db import DB from werkzeug.utils import secure_filename from vehicle import Vehicle from cctv import CCTV import cv2 import os import time app = Flask(__name__) db = DB() cap = WebcamVideoStream() UPLOAD_FOLDER = ('static/img/uploads') ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'} app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER @app.route('/start_stream') def start_stream(): if str(db.getRTSP_cctv()) is '0': print(str(db.getRTSP_cctv())) cap.start() else: src_rtsp = str(db.getRTSP_cctv()) cap.start(src=src_rtsp) print(' * License Plate Processing is started...!!!') print(' * RTSP : ' + str(db.getRTSP_cctv())) return redirect(url_for('index'))
def getNameByID(id): mdb = DB() res = mdb.queryOneRow('SELECT * FROM groups WHERE ID = %d', (id, )) return res['name']
def disableForPost(name): mdb = DB() mdb.queryOneRow( "update groups set first_record_postdate = %s where name = %s", ('2000-00-00 00:00:00', mdb.escapeString(name)))
def getActiveIDs(): mdb = DB() return mdb.query('SELECT ID FROM groups WHERE active = 1 ORDER BY name')
def getAll(): mdb = DB() return mdb.query( "SELECT groups.*, COALESCE(rel.num, 0) AS num_releases FROM groups LEFT OUTER JOIN (SELECT groupID, COUNT(ID) AS num FROM releases group by groupID) rel ON rel.groupID = groups.ID ORDER BY groups.name" )
def updateGroupStatus(id, status=0): mdb = DB() mdb.query('UPDATE groups SET active = %s WHERE id = %s', (status, id)) status = 'deactivated' if status == 0 else 'activated' return 'Group %d has been %s' % (id, status)
def getByName(grp): mdb = DB() return mdb.queryOneRow('SELECT * FROM groups WHERE name = %s', (grp, ))
def getByID(id): mdb = DB() return mdb.queryOneRow('SELECT * FROM groups WHERE ID = %s', (id, ))
def getActiveByDate(): mdb = DB() return mdb.query( 'SELECT * FROM groups WHERE active = 1 ORDER BY first_record_postdate DESC' )
def delete(id): mdb = DB() return mdb.query('delete from groups where ID = %d', (id, ))
def resetall(): mdb = DB() return mdb.query( 'update groups set backfill_target=0, first_record=0, first_record_postdate=null, last_record=0, last_record_postdate=null, last_updated=null, active = 0' )
def __init__(self): self.modelset=Model()#モデルロードインスタンス生成 self.dblord=DB()#db記録インスタンス生成
def reset(id): mdb = DB() return mdb.query( 'update groups set backfill_target=0, first_record=0, first_record_postdate=null, last_record=0, last_record_postdate=null, active = 0, last_updated=null where ID = %d', (id, ))
class MySpider(spider.Spider): def __init__(self, proxy_enable=False, proxy_max_num=setting.PROXY_MAX_NUM, timeout=setting.HTTP_TIMEOUT, cmd_args=None): spider.Spider.__init__(self, proxy_enable, proxy_max_num, timeout=timeout, cmd_args=cmd_args) # 网站名称 self.siteName = "重庆公共资源交易中心" # 类别码,01新闻、02论坛、03博客、04微博 05平媒 06微信 07 视频、99搜索引擎 self.info_flag = "99" # 入口地址列表 # self.start_urls = ["http://www.bidcenter.com.cn/viplist-1.html"] self.start_urls = ["http://www.zgazxxw.com"] self.encoding = 'gb2312' self.site_domain = 'zgazxxw.com' self.dedup_uri = None self.headers = { # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', # 'Accept-Encoding': 'gzip, deflate', # 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', # 'Cache-Control': 'max-age=0', # 'Connection': 'keep-alive', # 'Cookie': 'UM_distinctid=1670593cdcc4f5-0800ff5ff60ef9-594d2a16-15f900-1670593cdce395; CNZZDATA1264557630=446493554-1541984678-%7C1541984678', # 'DNT': '1', # 'Host': 'www.scggzy.gov.cn', # 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36', # "Referer":"https://www.bidcenter.com.cn", # 'Cookie': 'ASP.NET_SessionId=edvtzkuc3fir5uo0dgd33pwl; UM_distinctid=166e2d98409596-08af12546c38b9-12656e4a-1fa400-166e2d9840a47e; CNZZDATA888048=cnzz_eid%3D758459646-1541404197-%26ntime%3D1541404197; Hm_lvt_9954aa2d605277c3e24cb76809e2f856=1541404198; Hm_lpvt_9954aa2d605277c3e24cb76809e2f856=1541404198', } # self.proxy_enable = "http://spider-ip-sync.istarshine.net.cn/proxy_100ms.txt" #self.proxy_url = 'http://spider-ip-sync.istarshine.net.cn/proxy_100ms.txt' self.request_headers = {'headers': self.headers} self.conn_config = redis.StrictRedis.from_url('redis://192.168.1.34/1') redis_ip = self.conn_config.get("redis_ip") redis_db = self.conn_config.get("redis_db") mysql_ip = self.conn_config.get("mysql_ip") mysql_databases = self.conn_config.get("mysql_databases") mysql_username = self.conn_config.get("mysql_username") mysql_password = self.conn_config.get("mysql_password") mysql_list_info = self.conn_config.get("mysql_table1") result1 = self.conn_config.get("mysql_list_model_filter") base2 = self.conn_config.get("mysql_detail_info") try: self.conn = redis.StrictRedis.from_url('redis://{0}/{1}'.format( redis_ip, redis_db)) except: self.url_db = None self.db = DB().create('mysql://{0}:{1}@{2}:3306/{3}'.format( mysql_username, mysql_password, mysql_ip, mysql_databases)) self.table = mysql_list_info self.result1 = result1 self.base2 = base2 self.sess = requests.session() self.all = {} def get_start_urls(self, data=None): ''' 返回start_urls ''' return self.start_urls def parse(self, response, url): ''' 抓取列表页下所有详情页的链接 ''' # pipe = self.url_db.pipeline() # for _ in xrange(self.limit): # # pipe.rpoplpush(self.list,self.list) # pipe.rpop(self.list) # try: # urls = pipe.execute() # except: # urls = [] page_urls = [] urls = self.db.table(self.result1).where( '''tf = "1" and siteName = "重庆公共资源交易中心"''').find() dict_page_info = [url for url in urls if url is not None] # print "********-->",len(dict_page_info) for str_urls in dict_page_info: # print "str_urls",str_urls # dict_post = json.loads(str_urls) dict_post = str_urls try: detailUrl = dict_post.get("detailUrl") except Exception as e: print e self.all[detailUrl] = dict_post page_urls.append(detailUrl) return (page_urls, None, None) def parse_detail_page(self, response=None, url=None): try: response.encoding = self.encoding unicode_html_body = response.text data = htmlparser.Parser(unicode_html_body) except Exception, e: return [] detail_url = response.url dict_post = self.all.get(detail_url) ctime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print ctime if data: content_xmls = data.xpathall('''//div[@class="list_content fl"]''') content_xml = "" for i in content_xmls: content_xml += i.data # 2招标(中标)内容 contents = data.xpathall( '''//div[@class="list_content fl"]//text()''') # 内容 content = '' for i in contents: content += i.text().strip() + " " if content == '': content = self.siteName content = self.makecontent(content) # 4采购人 tender = self.getPurchasingPersonName(content, content_xml) # 中标人 bidder = self.getPurchasingPerson(content, content_xml) # 价格 price = self.getprice(content, content_xml) post = { "id": dict_post.get("id"), "uuid": dict_post.get("uuid"), # md5 "detailUrl": detail_url, # url "name": dict_post.get("name"), # 标题 "location": dict_post.get("location"), # 地区 "publicTime": dict_post.get("publicTime"), # 公布时间 "tag": dict_post.get("tag"), # 标签 "site": self.site_domain, #域名 "siteName": self.siteName, #域名名稱 "ctime": ctime, #采集時間 "service": dict_post.get("service"), "industry": dict_post.get("industry"), "price": price, #價格 "tender": tender, #招標人 "bidder": bidder, #中標人 "content": content, } dic = self.handle_post(post) try: self.db.table(self.base2).add(dic) y = {"tf": "0"} self.db.table(self.result1).where('''uuid="{0}"'''.format( dict_post.get("uuid"))).update(y) except Exception as e: print e
def __init__(self, table_name): DB.__init__(self, table_name)
import csv import sys import time from datetime import timedelta from config import config from logger import Logger from db import DB TEMP_DIR = config['DATA']['TEMP_DIR'] logger = Logger('ETL') db = DB.getDB() csv.register_dialect('Dialect', quoting=csv.QUOTE_ALL, skipinitialspace=True) class ETL: def __init__(self, name, sources, target, col_name, handler): self._name = name self._sources = sources self._target = target self._col_name = col_name self._handler = handler logger.info(f"{name} - Found '{len(sources)}' files from sources") def run(self): logger.info(f"Start '{self._name}' ETL process") start_time = time.time()
class storageandfication: #コンストラクタでインスタンス生成 def __init__(self): self.modelset=Model()#モデルロードインスタンス生成 self.dblord=DB()#db記録インスタンス生成 #img保存メソッド def download_img(self,url,file_name): r = requests.get(url, stream=True) if r.status_code == 200: with open(file_name, 'wb') as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) #自動保存分類メソッド def mainprosesing(self,Account): path='img/'+Account+'/' #保存先フォルダ if os.path.exists(path): pass else: os.mkdir(path) oldtime=self.dblord.dbmach(Account)#指定ユーザーidがDB内にある場合は前回の最新ツイートの日時を取得 print("時間:"+str(oldtime)) # timelineメソッドの一つである api.user_timeline()では1ページにつき最大200ツイートしか取得できず、 # 合計3200ツイート、つまり最初のページから16回しかページをめくれないようです。 # そのページを数えるためリスト型を使っています。 pages=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] #ループ内での例外処理用のフラグ fasttweetflag=False timeflag=False api=login.login("自分のユーザーID")#ログイン #画像を含むツイート取得するループ for page in pages: #Accountは検索するユーザID、countは1ページあたり取得するツイートの数(最大200)、pageはページの番号 tweets=api.user_timeline(Account, count=200, page=page) for tweet in tweets: try: tweet.created_at+=timedelta(hours=9)#ツイート時間取得 print(tweet.created_at) if tweet.created_at <= oldtime: #DB内に記録されている時間を下回ったらループ終了 print("roop_end") timeflag=True break if fasttweetflag==False: #一番最新のツイート時間をDBに記録しておくことで次のアクセスでの画像多重保存を防ぐ print("dblogwrit") self.dblord.dbmemo(Account,tweet.created_at)#ユーザー名と最新ツイート時間を渡す fasttweetflag=True url=tweet.extended_entities['media'][0]['media_url'] tdatetime = dt.now() filename=path+tdatetime.strftime('%Y%m%d%H%M%S')+'.jpg' #"img"というサブフォルダに保存 self.download_img(url,filename)#取得したツイートの画像を保存 label=self.modelset.modellabel(filename)#取得した画像のラベルを返す #モデルのラベルがyesの時だけフォルダに保存、ラベルがnoの画像は削除 if label=='no': os.remove(filename)#ファイル消去 else: print("ラベル:yes") time.sleep(1) #上書きされるので1秒待ってファイル名が変わるようにする except: pass #画像がないときはなにもしない if timeflag: break