def crTrieIndex(): name = request.form['name'] files = request.form.getlist('files[]') files = list(filter(lambda n: n != "", files)) TRIEINDEX = '../../Model/trieIndex/' INVERTINDEXPATH = '../../Model/invertIndex/' try: startTime = time() trieIndex, reverseTrieIndex = createPrefixTree(INVERTINDEXPATH, name) resTime = time() - startTime query = "UPDATE `dictionary` SET `trieIndex`=1 WHERE `name`='" + name + "'" con = connection.getConnection() with con: cur = con.cursor() cur.execute(query) con.commit() trieIndex.save(TRIEINDEX + name) reverseTrieIndex.save(TRIEINDEX + "r_" + name) return {'success': True, 'time': resTime} except: abort(500)
def crInvertIndex(): name = request.form['name'] files = request.form.getlist('files[]') files = list(filter(lambda n: n != "", files)) FILEPATH = '../../Model/books/' INVERTINDEXPATH = '../../Model/invertIndex/' MATRIXPATH = '../../Model/matrix/' startTime = time() invertIndex = createInvertIndex(FILEPATH, files, 0) confMatrix = createMatrixFromIndex(invertIndex, len(files)) resTime = time() - startTime query = "UPDATE `dictionary` SET `invertIndex`=1 WHERE `name`='" + name + "'" con = connection.getConnection() with con: cur = con.cursor() cur.execute(query) con.commit() with codecs.open(INVERTINDEXPATH + name, 'w', 'utf-8') as f: json.dump(invertIndex, f, ensure_ascii=False) with codecs.open(MATRIXPATH + name, 'w', 'utf-8') as f: json.dump(confMatrix, f, ensure_ascii=False) return {'success': True, 'time': resTime}
def crCoordIndex(): name = request.form['name'] files = request.form.getlist('files[]') files = list(filter(lambda n: n != "", files)) FILEPATH = '../../Model/books/' COORDINDEX = '../../Model/coordIndex/' DICTIONARYPATH = '../../Model/dictionaries/' try: startTime = time() coordIndex = getCoordIndex(FILEPATH, DICTIONARYPATH, name, files) resTime = time() - startTime query = "UPDATE `dictionary` SET `coordIndex`=1 WHERE `name`='" + name + "'" con = connection.getConnection() with con: cur = con.cursor() cur.execute(query) con.commit() with codecs.open(COORDINDEX + name, 'w', 'utf-8') as f: json.dump(coordIndex, f, ensure_ascii=False) return {'success': True, 'time': resTime} except: abort(500)
def writeToDBAndToFile(name, allWords, uniqueWords, collectionSize, timeToCreate, booksNum, ids): con = connection.getConnection() with con: cur = con.cursor() query = "INSERT INTO `dictionary`(`name`, `allWords`, `uniqueWords`, `collectionSize`, `timeToCreate`, `booksNum`, `ids`) VALUES ('" + name + "'," + str( allWords) + "," + str(uniqueWords) + "," + str( collectionSize) + "," + str(timeToCreate) + "," + str( booksNum) + ",'" + ids + "')" cur.execute(query) con.commit()
def read_sql(ImageID, VarName): try: conn = getConnection() cur = conn.cursor() args = [ImageID] cur.callproc("sp_join_skyghi", args) result = cur.fetchall()[0] result = result[VarName] conn.close() return result except: return 'NaN'
def getCreatedDicts(): name = request.args['name'] con = connection.getConnection() with con: cur = con.cursor() cur.execute( "SELECT `dict`, `invertIndex`, `coordIndex`, `2WordsIndex`, `gramIndex`, `permutationIndex`, `trieIndex`, `invertIndexByParts` FROM `dictionary` WHERE `name`='" + name + "'") rows = cur.fetchall() count = 1 res = {} for row in rows: res[count] = row count += 1 return res
def createDictionary(): # get params name = request.form['name'] ids = request.form.getlist('id[]') con = connection.getConnection() files = [] # check if dictionary with this name is already exist cur = con.cursor() cur.execute("Select `name` from `dictionary` where `name`='" + name + "'") row = cur.fetchone() if row is not None: return "Dict is already exist" # get books for i in ids: with con: cur.execute("SELECT * FROM `books` where id=" + i) row = cur.fetchone() files.append(row['name']) FILEPATH = '../../Model/books/' # get all dicts and lists for record startTime = time() uniqueWords, sizeOfFiles, uniqueWordsNumber, allWordsNumber = createWordsListAndInfo( FILEPATH, files) resTime = time() - startTime # record all information writeToDBAndToFile(name, allWordsNumber, uniqueWordsNumber, sizeOfFiles, resTime, len(files), ' '.join(ids)) return json.dumps({ 'success': True, 'name': name, 'size': sizeOfFiles, 'allWords': allWordsNumber, 'uniqueWords': uniqueWordsNumber, 'time': resTime, 'booksNum': len(files) }), 200, { 'ContentType': 'application/json' }
def cr2WordIndex(): name = request.form['name'] files = request.form.getlist('files[]') files = list(filter(lambda n: n != "", files)) FILEPATH = '../../Model/books/' WORDS2INDPATH = '../../Model/words2Index/' startTime = time() words2Ind = create2wordsIndex(FILEPATH, files) resTime = time() - startTime query = "UPDATE `dictionary` SET `2WordsIndex`=1 WHERE `name`='" + name + "'" con = connection.getConnection() with con: cur = con.cursor() cur.execute(query) con.commit() with codecs.open(WORDS2INDPATH + name, 'w', 'utf-8') as f: json.dump(words2Ind, f, ensure_ascii=False) return {'success': True, 'time': resTime}
def crDict(): name = request.form['name'] files = request.form.getlist('files[]') files = list(filter(lambda n: n != "", files)) FILEPATH = '../../Model/books/' DICTIONARYPATH = '../../Model/dictionaries/' startTime = time() uniqueWords = createWordsList(FILEPATH, files) resTime = time() - startTime query = "UPDATE `dictionary` SET `dict`=1 WHERE `name`='" + name + "'" con = connection.getConnection() with con: cur = con.cursor() cur.execute(query) con.commit() with codecs.open(DICTIONARYPATH + name, 'w', 'utf-8') as f: json.dump(uniqueWords, f, ensure_ascii=False) print(memory_usage()) return {'success': True, 'time': resTime}
def getServerSocket(): global _server_socket if not _server_socket: _server_socket = connection.getConnection() return _server_socket
import connection import groups import users import logging if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) import sys dni = sys.argv[1] office = sys.argv[2] logging.info('agregando {} a la oficina {}'.format(dni, office)) con = connection.getConnection() try: selectedOffice = None offices = groups.OfficeDAO.findAll(con) for oid in offices: off = groups.OfficeDAO.findById(con, oid) if off.name == office: selectedOffice = off break if selectedOffice is None: logging.info('La oficina no existe') sys.exit(1) uid = users.UserDAO.findByDni(con, dni)[0] if uid not in selectedOffice.users:
class fighterSpider(scrapy.Spider): conn = connection.getConnection() name = 'event' allowed_domains = ['www.bestfightodds.com'] start_urls = [ 'https://www.bestfightodds.com/events/ufc-231-holloway-vs-ortega-1584' ] def parse(self, response): print '===============================================================' event = {} event['name'] = response.css( 'div.table-outer-wrapper div.table-div div.table-header a::text' ).extract_first() event['date'] = response.css( 'div.table-outer-wrapper div.table-div div.table-header span.table-header-date::text' ).extract_first() fighters = {} fighters['name'] = response.css( 'div.table-outer-wrapper div.table-div div.table-inner-wrapper div.table-scroller table.odds-table tbody th span.tw::text' ).extract() if self.conn is not None: # save event info in database event_id = self.save_event(self.conn, event) # save fighters info in database fighters_ids = self.save_fighters(self.conn, fighters) # save event fighter mapping self.event_fighter_mapping(self.conn, event_id, fighters_ids) self.conn.close() print 'Event and Fighters extracted and saved in database' else: print 'Not able to connect to database' print '===============================================================' def save_event(self, conn, event): event_name = event.get('name') event_date = event.get('date') cursor = conn.cursor() query = "select event_id,event_name from events where event_name=%s" cursor.execute(query, (event_name, )) result = cursor.fetchone() if result < 0: query = "INSERT INTO events (event_name,event_date) VALUES (%s,%s)RETURNING event_id;" try: cursor.execute(query, (event_name, event_date)) event_id = cursor.fetchone()[0] return event_id except: print 'Error in executing query' traceback.print_exc(file=sys.stdout) cursor.close() conn.commit() else: print "Event already exists in the database!!!" event_id = result[0] return event_id def save_fighters(self, conn, fighters): fighter_name = fighters.get('name') fighter_coun = len(fighter_name) fighters_ids = [] cursor = conn.cursor() if fighter_coun > 0: for i in range(fighter_coun): fighters_name = fighter_name[i] query = "select fighter_id,fighter_name from fighters where fighter_name=%s" cursor.execute(query, (fighters_name, )) result = cursor.fetchone() if result < 0: query = "INSERT INTO fighters (fighter_name) VALUES (%s)RETURNING fighter_id;" try: cursor.execute(query, (fighters_name, )) fighters_id = cursor.fetchone()[0] fighters_ids.append(fighters_id) except: print 'Error in executing query' traceback.print_exc(file=sys.stdout) else: fighters_id = result[0] fighters_ids.append(fighters_id) cursor.close() conn.commit() return fighters_ids def event_fighter_mapping(self, conn, event_id, fighters_ids): event_id = event_id fighters_id = fighters_ids fighters_id_count = len(fighters_id) cursor = conn.cursor() for i in range(fighters_id_count): fighter_id = fighters_id[i] query = "select fighter_id,event_id from event_fighter_mapping where fighter_id = %s AND event_id = %s" cursor.execute(query, (str(fighter_id), str(event_id))) result = cursor.fetchone() if result < 0: query = "INSERT INTO event_fighter_mapping (fighter_id,event_id) VALUES (%s,%s)" try: cursor.execute(query, (fighter_id, event_id)) except: print 'Error in executing query' traceback.print_exc(file=sys.stdout) cursor.close() conn.commit()
# Import packages and libraries import datetime import glob import re import connection # Search for files and paths for current date root = '/mnt/nas/CMS_Data' year = datetime.date.today().strftime("%Y") month = datetime.date.today().strftime("%Y-%m") day = datetime.date.today().strftime("%Y-%m-%d") files = glob.glob("%s/StationsData_*/*/Converted to TXT/%s/%s/*%s.txt" % (root, year, month, day)) load_sql = ("LOAD DATA LOCAL INFILE '%s' " "REPLACE INTO TABLE %s " "FIELDS TERMINATED BY '\t' " "LINES TERMINATED BY '\n' " "IGNORE 1 LINES; ") # Load text files to MySQL DB connection = connection.getConnection() cursor = connection.cursor() for file in files: print(file) station = re.findall('\[([0-9]+)\]', file)[0] table = 'SIN' + station print(table) cursor.execute(load_sql % (file, table)) connection.commit() connection.close()
def insertDB(self): sql = f'Insert INTO items1 (url, name,Price , email) VALUES (\'{self.url}\', \'{self.name}\',\'{self.price}\', \'{self.email}\')' cr = connection.getCursor() cr.execute(sql) db = connection.getConnection() db.commit()
def boolSearch(): # get request params timeStart = time() name = request.form['name'] req = request.form['request'] booksNum = int(request.form['num']) # get matrix f = codecs.open('../../Model/matrix/' + name, 'r', 'utf-8') dict = json.load(f) req = req.lower() # get operations and words words = re.findall("[^&|\^]+", req) words = list(map(replaceSpace, words)) operations = re.findall("[&|\^]", req) resWords = None # make not operation and transfer matrix into 0b for i in range(len(words)): words[i] = replaceSpace(words[i]) if words[i].count(" ") > 0: words[i] = fraseSearch(words[i], name, booksNum) elif words[i].startswith("!"): words[i] = words[i].replace("!", "") words[i] = int( dict[words[i]].replace("1", "2").replace("0", "1").replace("2", "0"), 2) elif "*" in words[i]: words[i], resWords = joker(words[i], name, booksNum) else: words[i] = int(dict[words[i]], 2) res = words[0] # make all other operations for i in range(len(operations)): if operations[i] == "&": res &= words[i + 1] if operations[i] == "|": res |= words[i + 1] if operations[i] == "^": res ^= words[i + 1] # record final result res = bin(res).replace("0b", "") if len(res) != booksNum: for i in range(booksNum - len(res)): res = "0" + res con = connection.getConnection() # cur = con.cursor() cur.execute("SELECT `ids` FROM `dictionary` where `name` = '" + name + "'") row = cur.fetchone()['ids'].split(" ") # change 0b result to names names = [] for i in range(len(res)): if res[i] == "1": cur.execute("SELECT `name` FROM `books` where `id` = " + row[i]) names.append(cur.fetchone()['name']) print(names.append(resWords)) resTime = time() - timeStart return json.dumps({ 'success': True, 'names': names, 'time': resTime }), 200, { 'ContentType': 'application/json' }
def crInvertIndexByParts(): name = request.form['name'] files = request.form.getlist('files[]') files = list(filter(lambda n: n != "", files)) SIZE_LIMIT = 10000000 FILEPATH = '../../Model/books/' INVERTINDEXBYPARTSPATH = '../../Model/invertIndexByParts/' count = 0 startTime = time() step = 0 while count < len(files): sizeCur = 0 filesCur = [] step += 1 startBook = count while sizeCur < SIZE_LIMIT and count < len(files): sizeCur += os.path.getsize(FILEPATH + files[count]) filesCur.append(files[count]) count += 1 invertIndex = createInvertIndex(FILEPATH, filesCur, startBook) with codecs.open(INVERTINDEXBYPARTSPATH + name + "_" + str(step), 'w', 'utf-8') as out_file: s = "" for k, v in invertIndex.items(): s += str(k) + ":" + str(v) + "\n" out_file.write(s) output = codecs.open(INVERTINDEXBYPARTSPATH + name, 'w', 'utf-8') output.write("{") readers = [ codecs.open(INVERTINDEXBYPARTSPATH + name + "_" + str(i + 1), 'r', 'utf-8') for i in range(step) ] lines = [i.readline().replace("\n", "") for i in readers] while len(lines) != 0: currWord = sorted(lines)[0].split(":")[0] arr = [] for i in range(len(lines)): if lines[i].split(":")[0] == currWord: arr += json.loads(lines[i].split(":")[-1]) lines[i] = readers[i].readline().replace("\n", "") arr.sort() output.write('"' + currWord + '": ' + str(arr)) if "" in lines: lines.remove("") if len(lines) != 0: output.write(", ") output.write("}") output.close() for i in range(step): readers[i].close() os.remove(INVERTINDEXBYPARTSPATH + name + "_" + str(i + 1)) resTime = time() - startTime query = "UPDATE `dictionary` SET `invertIndexByParts`=1 WHERE `name`='" + name + "'" con = connection.getConnection() with con: cur = con.cursor() cur.execute(query) con.commit() return {'success': True, 'time': resTime}