def main(): root = utils.get_root_path(False) usage = "usage: %prog [options] arg" parser = OptionParser(usage) parser.add_option('--learning_rate_rbm', action='store', type='string', dest='learning_rate_rbm') parser.add_option('--epochs_rbm', action='store', type='string', dest='epochs_rbm') parser.add_option('--batch_size', action='store', type='string', dest='batch_size') parser.add_option('--data_set', action='store', type='string', dest='data_set') (opts, args) = parser.parse_args() file_data = ReadFile.ReadFile(root + '/NSL_KDD-master', opts=opts).get_data() data_pp = preprocess.Preprocess(file_data).do_predict_preprocess() dbn_model.DBN(data_pp).do_dbn('yadlt', opts=opts) dbn_model.DBN(data_pp).do_dbn_with_weight_matrix(root + '/save') model.do_svm()
def generate_bulk_insert_data(self, file_folder, num): read_file = ReadFile.ReadFile(file_folder) # return list data -> generate bulk data # list data contains json like data # add action and meta data to every json data # 进行数据冗余 # 从url指定位置,减少每次action动作的数据 action = {"index": {}} bulk_data = "" for data_list in read_file.get_data(num): # data list -> data # 传过来的data为从源文件取出的json格式 # 对content做数据冗余,先将json载入为map,再添加,然后再dumps为json格式 for data in data_list: try: data_map = json.loads(data, encoding='utf-8') data_map['title1'] = data_map['title'] data_map['tag'] = {"input": data_map['title']} data_map['text1'] = data_map['text'] data_map['time'] = data_map['time'] data_map['timestamp'] = time.mktime( time.strptime(data_map['time'], '%Y-%m-%d %H:%M:%S')) #print(data_map['timestamp']) bulk_data += json.dumps( action, ensure_ascii=False) + '\n' + json.dumps( data_map, ensure_ascii=False) + '\n' except: continue bulk_data += '\n' yield (bulk_data) bulk_data = ""
def Remove(self, key): locate_file = ReadFile() record = locate_file.readFile("data.txt") for year in self.hashTable: for each in year: if each[-2:] == key: year.remove(each) year.append('-1')
def main(self): # Initialize Log File Format logging.basicConfig(handlers=[logging.FileHandler('LogFile.log', 'w', 'utf-8')], level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.info('Started') # Create Read File read = ReadFile() # Empty lists and paths to read files in working directory Polls = [] BYSLIST = "" tempList = os.listdir(os.getcwd()) # For each file if that file ends with csv and if it is a answer key then call readAnswerKey for files in tempList: if files.endswith(".csv"): if "AnswerKey" in files: read.readAnswerKey(files) else: # Else add it to Polls Polls.append(files) # If file ends with xls then it is the student list elif files.endswith(".xls"): BYSLIST = files # read student list by calling readStudentFile function students = read.readStudentFile(BYSLIST) # Initialize which poll is being used usedPolls = [] for report in Polls: # Read report files and add them to used polls read.readReportFile(report, usedPolls) # For each student match their answers with correct answers for student in students: student.setCorrectAnswer() student.toString() # Create write Output object write = WriteOutput(students, usedPolls) # Write output of each students result for each Poll write.writeGlobalResult() # Create Metric metric = Metric(read) # Write results to the Global File metric.calculateMetrics(usedPolls) # Print results of each question and print pie charts and histograms to excel file metric.calculateQuestionDistrubition(usedPolls) # Print attendance write.writeAttandence(metric)
def run_index(): # run an entire index build global docs_path global postings_path global is_stemming global indexer global dict_cache_path try: # check validation conditions if (not check_corpus_directory(docs_path.get())) or (not check_postings_directory(postings_path.get())): return result = tkMessageBox.askquestion("Run Index", "Are you sure?\n dont worry if the GUI" " is stuck or not responding - it is working", icon='warning') if result != 'yes': return print ('START TIME - ' + time.strftime("%H:%M:%S")) start_time = datetime.now() # reset the current memory of the project if (globs.main_dictionary is not None) and (bool(globs.main_dictionary)): globs.main_dictionary.clear() if (globs.cache is not None) and (bool(globs.cache)): globs.cache.clear() if (globs.documents_dict is not None) and (bool(globs.documents_dict)): globs.documents_dict.clear() # start indexing globs.stop_words = load_stop_words(docs_path.get()) indexer = Indexer.Indexer(postings_path.get(), is_stemming.get()) read_file = ReadFile.ReadFile(get_corpus_dir(docs_path.get()), indexer, globs.constants, globs.stop_words, is_stemming.get()) read_file.index_folder() globs.num_of_documents = len(read_file.documents_dict) globs.documents_dict = read_file.documents_dict del read_file indexer.unite_temp_postings() globs.main_dictionary = indexer.main_dict indexer.build_document_weight(globs.documents_dict) # in case want to print stats, uncomment this # with open('{}{}'.format('stats', 'stem' if is_stemming.get() else ''),'w') as my_stats_file: # my_stats_file.write('term,tf,df\n'.format()) # for key,val in main_dictionary.iteritems(): # my_stats_file.write('{},{},{}\n'.format(key,val.tf,val.df)) globs.cache = indexer.cache_dict globs.average_doc_size = globs.average_doc_size/globs.num_of_documents dict_cache_path = postings_path print ('END TIME - ' + time.strftime("%H:%M:%S")) end_time = datetime.now() print_stats_at_end_of_indexing(end_time - start_time) except Exception as err: tkMessageBox.showinfo('ERROR', err) traceback.print_exc(file=stdout)
def Get(self, data_value): assert data_value is not None, "Data value must be valid." locate_file = ReadFile.ReadFile() locate_file.readFile('data.txt') hash_value = int(bin(self.__BKDRHash(str(data_value)[2:]))[-3:], base=2) candidates = self.table[hash_value] results = [] for idx in range(len(candidates)): if data_value == candidates[idx][0]: results.append(candidates[idx][1]) return [' '.join(locate_file.content[int(i)])[:-1] for i in results]
def parserForNltk(self, path): words = "" read = ReadFile.ReadFile() lines = read.readCsvFile(path) line = lines.readline() while line: if "Código" in line: line = lines.readline() while "Destino" not in line: line = line.replace("\n", "") words = words + " " + line line = lines.readline() line = lines.readline() lines.close() return words
def translatefile(filename): inputfile = filename iReadStart = getReadStart(filename) iFileIndex = getFileIndex(filename) readObj = ReadFile(inputfile) iFileSize = readObj.filesize() (iPos, data) = readObj.readvalue(iReadStart, 12) while(len(data) > 0): oldFile = getoldfile() if iReadStart == 0: newName = convert2filename(iFileSize, 0, filename) else: newName = convert2filename(iPos, iFileIndex, data) while (len(oldFile) == 0): sleep(1000) oldFile = getoldfile() convert2newfile(oldFile, newName)
def parseItem(self,path,counter): self.cestaDAO.retrieveAll(counter) self.dateDAO.retrieveAll() lojaId = self.lojaDAO.lojaDict[counter] read = ReadFile.ReadFile() lines = read.readCsvFile(path) line = lines.readline() while line: if "[S]" in line or "[DS]" in line: operacaoId = self.operacaoDAO.wichOperacao(line) line = re.sub(' +',' ',line) line = line.split(' ') cestaId = self.cestaDAO.cestaDict[int(line[2])] date = line[0].replace("\"","") dateId = self.dateDAO.dateDict[date] if "Código" in line: line = lines.readline() while "Destino" not in line: colecaoId = self.colecaoDAO.wichColecao(line) corId = self.corDAO.wichCor(line) estadoId = self.estadoDAO.wichEstado(line) faixaEtariaId = self.faixaEtariaDAO.wichFaixaEtaria(line) generoId = self.generoDAO.wichGenero(line) materialId = self.materialDAO.wichMaterial(line) modeloId = self.modeloDAO.wichModelo(line) tamanhoId = self.tamanhoDAO.wichTamanho(line) itemId = self.itemDAO.wichItem(line) tipoId = self.tipoDAO.wichTipo(self.itemDAO.wichTipo(itemId)) line = re.sub(' +',' ',line) line = line.split(' ') valor=line[len(line)-3] qtd =line[len(line)-4] if itemId != 0: self.compraDAO.saveCompra(lojaId,cestaId,dateId,operacaoId,colecaoId,corId,estadoId,faixaEtariaId,generoId,materialId,modeloId,tamanhoId,itemId,tipoId,valor,qtd) line = lines.readline() line = lines.readline() lines.close() self.compraDAO.pg.commit()
def Get(self, query): locate_file = ReadFile() record = locate_file.readFile("data.txt") if '|' in query: info = query.split('|') year_index = int(info[0]) % 10 movie_type = self.type.index(info[1]) index = str(info[0]) + '0' + str(movie_type) for each in self.hashTable[year_index]: if each[:6] == index: location = int(each[6:]) print ", ".join(record[location]) elif each == '-1': return "The record is not in the database" else: year_index = int(query) % 10 for each in self.hashTable[year_index]: if each == '-1': # print "no more record in query" break else: if each[:4] == query: location = int(each[6:]) print ", ".join(record[location])
def parse(self, path, counter): cestaDAO = CestaDAO.CestaDAO() read = ReadFile.ReadFile() lines = read.readCsvFile(path) line = lines.readline() while line: line = re.sub(' +', ' ', line) line = line.split(' ') if "[S]" in line: query = "INSERT INTO cesta(qtd_pecas, valor, codigo, loja)VALUES(" + line[ len(line) - 3] + ",\'" + line[ len(line) - 2] + "\'," + line[2] + "," + str(counter) + ");" cestaDAO.pg.executeQuery(query) elif "[DS]" in line: query = "INSERT INTO cesta(qtd_pecas, valor, codigo, loja)VALUES(" + line[ len(line) - 4] + ",\'" + line[ len(line) - 3] + "\'," + line[2] + "," + str(counter) + ");" cestaDAO.pg.executeQuery(query) line = lines.readline() lines.close() cestaDAO.pg.commit() cestaDAO.pg.disconnect()
__author__ = 'jarfy' from ReadFile import * from hashmap import * test = ReadFile() data = test.readFile("data.txt") formate = test.getType() operator = MovieIndex(formate) operator.MovieHash(data) print "show movie index" print operator.hashTable print print "put 46th record into index" operator.Put( 46, "The Abyss,1970,LaserDisc,Science Fiction,James Cameron,James Cameron,USA,20th Century Fox,$0.00" ) print operator.hashTable print print "Find all movies made in 2000" operator.Get("2000") print print "Find all movies made in 2005" operator.Get("2005") print print "Find all movies made in 2010" operator.Get("2010") print print "Find all DVD movies made in 1977" operator.Get("1977|DVD") print
import ReadFile import preprocess read = ReadFile.ReadFile( "D:\\PycharmProjects\\DBN-SVM\\NSL_KDD-master").get_data() data_pp = preprocess.Preprocess(read).do_predict_preprocess() print(data_pp)
from WriteFiles import * from ReadFile import * from ProjectionHandler import * import pandas as pd writer = WriteData(2018, "stats_file.txt") reader = ReadFile("stats_file.txt") writer.write_data_to_text(3) players3 = reader.generate_players() writer.write_clean_data_to_csv(players3, "threeyears.csv") writer.write_data_to_text(2) players2 = reader.generate_players() writer.write_clean_data_to_csv(players2, "twoyears.csv") writer.write_data_to_text(1) players1 = reader.generate_players() writer.write_clean_data_to_csv(players1, "oneyear.csv") writer.write_data_to_text(0) players0 = reader.generate_players() writer.write_clean_data_to_csv(players0, "currentyear.csv") handler = ProjectionHandler("currentyear.csv", players0, "oneyear.csv", players1, "twoyears.csv", players2, "threeyears.csv", players3) handler.create_dicts()
import sys import AStar as algorithm import ReadFile as input if __name__ == '__main__': N,S,G,A = input.ReadFile(sys.argv[1]) #map = algorithm.MapWithWeight(N,N) path={} cost={} obtacles =[] result,obtacles =algorithm.process(N,A,path,cost,S,G) #print(obtacles) algorithm.write_file(sys.argv[2],N,S,G,result,obtacles)
__author__ = 'jarfy' import ReadFile from HashTable import * # Test Case One print('Test Case One --- Set Year and Format as Index.') test = ReadFile.ReadFile() test.readFile("data.txt") test.setattributes('Year', 'Format') operator = HashTable() print() print("put records into index") for idx in range(len(test.record)): operator.Put(str(idx), test.record[idx]) print() print("Find all DVD movies made in 1997") temp = operator.Get("1997DVD") for record in temp: print(record) print() print("Find all VHS movies made in 1990") temp = operator.Get("1990VHS") for record in temp: print(record) print() print("Find all DVD movies made in 2001") temp = operator.Get("2001DVD") for record in temp: print(record) print()