コード例 #1
0
def main():
    root = utils.get_root_path(False)
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    parser.add_option('--learning_rate_rbm',
                      action='store',
                      type='string',
                      dest='learning_rate_rbm')
    parser.add_option('--epochs_rbm',
                      action='store',
                      type='string',
                      dest='epochs_rbm')
    parser.add_option('--batch_size',
                      action='store',
                      type='string',
                      dest='batch_size')
    parser.add_option('--data_set',
                      action='store',
                      type='string',
                      dest='data_set')

    (opts, args) = parser.parse_args()

    file_data = ReadFile.ReadFile(root + '/NSL_KDD-master',
                                  opts=opts).get_data()
    data_pp = preprocess.Preprocess(file_data).do_predict_preprocess()
    dbn_model.DBN(data_pp).do_dbn('yadlt', opts=opts)
    dbn_model.DBN(data_pp).do_dbn_with_weight_matrix(root + '/save')
    model.do_svm()
コード例 #2
0
 def generate_bulk_insert_data(self, file_folder, num):
     read_file = ReadFile.ReadFile(file_folder)
     # return list data -> generate bulk data
     # list data contains json like data
     # add action and meta data to every json data
     # 进行数据冗余
     # 从url指定位置,减少每次action动作的数据
     action = {"index": {}}
     bulk_data = ""
     for data_list in read_file.get_data(num):
         # data list -> data
         # 传过来的data为从源文件取出的json格式
         # 对content做数据冗余,先将json载入为map,再添加,然后再dumps为json格式
         for data in data_list:
             try:
                 data_map = json.loads(data, encoding='utf-8')
                 data_map['title1'] = data_map['title']
                 data_map['tag'] = {"input": data_map['title']}
                 data_map['text1'] = data_map['text']
                 data_map['time'] = data_map['time']
                 data_map['timestamp'] = time.mktime(
                     time.strptime(data_map['time'], '%Y-%m-%d %H:%M:%S'))
                 #print(data_map['timestamp'])
                 bulk_data += json.dumps(
                     action, ensure_ascii=False) + '\n' + json.dumps(
                         data_map, ensure_ascii=False) + '\n'
             except:
                 continue
         bulk_data += '\n'
         yield (bulk_data)
         bulk_data = ""
コード例 #3
0
 def Remove(self, key):
     locate_file = ReadFile()
     record = locate_file.readFile("data.txt")
     for year in self.hashTable:
         for each in year:
             if each[-2:] == key:
                 year.remove(each)
                 year.append('-1')
コード例 #4
0
    def main(self):
        # Initialize Log File Format
        logging.basicConfig(handlers=[logging.FileHandler('LogFile.log', 'w', 'utf-8')], level=logging.INFO,
                            format='%(asctime)s - %(levelname)s - %(message)s')
        logging.info('Started')

        # Create Read File
        read = ReadFile()
        # Empty lists and paths to read files in working directory
        Polls = []
        BYSLIST = ""
        tempList = os.listdir(os.getcwd())

        # For each file if that file ends with csv and if it is a answer key then call readAnswerKey
        for files in tempList:
            if files.endswith(".csv"):
                if "AnswerKey" in files:
                    read.readAnswerKey(files)
                else:
                    # Else add it to Polls
                    Polls.append(files)
                    # If file ends with xls then it is the student list
            elif files.endswith(".xls"):
                BYSLIST = files

        # read student list by calling readStudentFile function
        students = read.readStudentFile(BYSLIST)

        # Initialize which poll is being used
        usedPolls = []
        for report in Polls:
            # Read report files and add them to used polls
            read.readReportFile(report, usedPolls)

        # For each student match their answers with correct answers
        for student in students:
            student.setCorrectAnswer()
            student.toString()
            
        # Create write Output object
        write = WriteOutput(students, usedPolls)
        # Write output of each students result for each Poll
        write.writeGlobalResult()

        # Create Metric
        metric = Metric(read)
        # Write results to the Global File
        metric.calculateMetrics(usedPolls)

        # Print results of each question and print pie charts and histograms to excel file
        metric.calculateQuestionDistrubition(usedPolls)

        # Print attendance
        write.writeAttandence(metric)
コード例 #5
0
ファイル: main.py プロジェクト: omrikiantman/ir_bgu
def run_index():
    # run an entire index build
    global docs_path
    global postings_path
    global is_stemming
    global indexer
    global dict_cache_path
    try:
        # check validation conditions
        if (not check_corpus_directory(docs_path.get())) or (not check_postings_directory(postings_path.get())):
            return
        result = tkMessageBox.askquestion("Run Index",
                                          "Are you sure?\n dont worry if the GUI"
                                          " is stuck or not responding - it is working", icon='warning')
        if result != 'yes':
            return
        print ('START TIME - ' + time.strftime("%H:%M:%S"))
        start_time = datetime.now()
        # reset the current memory of the project
        if (globs.main_dictionary is not None) and (bool(globs.main_dictionary)):
            globs.main_dictionary.clear()
        if (globs.cache is not None) and (bool(globs.cache)):
            globs.cache.clear()
        if (globs.documents_dict is not None) and (bool(globs.documents_dict)):
            globs.documents_dict.clear()
        # start indexing
        globs.stop_words = load_stop_words(docs_path.get())
        indexer = Indexer.Indexer(postings_path.get(), is_stemming.get())
        read_file = ReadFile.ReadFile(get_corpus_dir(docs_path.get()),
                                      indexer, globs.constants, globs.stop_words, is_stemming.get())
        read_file.index_folder()
        globs.num_of_documents = len(read_file.documents_dict)

        globs.documents_dict = read_file.documents_dict
        del read_file
        indexer.unite_temp_postings()
        globs.main_dictionary = indexer.main_dict
        indexer.build_document_weight(globs.documents_dict)
        # in case want to print stats, uncomment this
        # with open('{}{}'.format('stats', 'stem' if is_stemming.get() else ''),'w') as my_stats_file:
        #    my_stats_file.write('term,tf,df\n'.format())
        #    for key,val in main_dictionary.iteritems():
        #        my_stats_file.write('{},{},{}\n'.format(key,val.tf,val.df))
        globs.cache = indexer.cache_dict
        globs.average_doc_size = globs.average_doc_size/globs.num_of_documents
        dict_cache_path = postings_path
        print ('END TIME - ' + time.strftime("%H:%M:%S"))
        end_time = datetime.now()
        print_stats_at_end_of_indexing(end_time - start_time)
    except Exception as err:
        tkMessageBox.showinfo('ERROR', err)
        traceback.print_exc(file=stdout)
コード例 #6
0
ファイル: HashTable.py プロジェクト: bomboom/542_Jiaqi_Chen
    def Get(self, data_value):
        assert data_value is not None, "Data value must be valid."
        locate_file = ReadFile.ReadFile()
        locate_file.readFile('data.txt')
        hash_value = int(bin(self.__BKDRHash(str(data_value)[2:]))[-3:],
                         base=2)
        candidates = self.table[hash_value]
        results = []
        for idx in range(len(candidates)):
            if data_value == candidates[idx][0]:
                results.append(candidates[idx][1])

        return [' '.join(locate_file.content[int(i)])[:-1] for i in results]
コード例 #7
0
ファイル: CsvParser.py プロジェクト: gustavodsf/py_projects
 def parserForNltk(self, path):
     words = ""
     read = ReadFile.ReadFile()
     lines = read.readCsvFile(path)
     line = lines.readline()
     while line:
         if "Código" in line:
             line = lines.readline()
             while "Destino" not in line:
                 line = line.replace("\n", "")
                 words = words + " " + line
                 line = lines.readline()
         line = lines.readline()
     lines.close()
     return words
コード例 #8
0
def translatefile(filename):
    inputfile = filename
    iReadStart = getReadStart(filename)
    iFileIndex = getFileIndex(filename)
        
    readObj = ReadFile(inputfile)
    iFileSize = readObj.filesize()
    (iPos, data) = readObj.readvalue(iReadStart, 12)
    while(len(data) > 0):
        oldFile = getoldfile()
        if iReadStart == 0:
            newName = convert2filename(iFileSize, 0, filename)
        else:
            newName = convert2filename(iPos, iFileIndex, data)
        while (len(oldFile) == 0):
            sleep(1000)
            oldFile = getoldfile()
        convert2newfile(oldFile, newName)
コード例 #9
0
	def parseItem(self,path,counter):
		self.cestaDAO.retrieveAll(counter)
		self.dateDAO.retrieveAll()
		lojaId = self.lojaDAO.lojaDict[counter]
		read = ReadFile.ReadFile()
		lines = read.readCsvFile(path)
		line = lines.readline()
		while line:	
			if "[S]" in line or "[DS]" in line:
				operacaoId = self.operacaoDAO.wichOperacao(line)
				line = re.sub(' +',' ',line)
				line = line.split(' ')
				cestaId = self.cestaDAO.cestaDict[int(line[2])]
				date = line[0].replace("\"","")
				dateId = self.dateDAO.dateDict[date]
			
			if "Código" in line:
				line = lines.readline()
				while "Destino" not in line:
					colecaoId = self.colecaoDAO.wichColecao(line)
					corId = self.corDAO.wichCor(line)
					estadoId = self.estadoDAO.wichEstado(line)
					faixaEtariaId = self.faixaEtariaDAO.wichFaixaEtaria(line)
					generoId = self.generoDAO.wichGenero(line)
					materialId = self.materialDAO.wichMaterial(line)
					modeloId = self.modeloDAO.wichModelo(line)
					tamanhoId = self.tamanhoDAO.wichTamanho(line)
					itemId = self.itemDAO.wichItem(line)
					tipoId = self.tipoDAO.wichTipo(self.itemDAO.wichTipo(itemId))
					line = re.sub(' +',' ',line)
					line = line.split(' ')
					valor=line[len(line)-3]
					qtd  =line[len(line)-4]

					if itemId != 0:
						self.compraDAO.saveCompra(lojaId,cestaId,dateId,operacaoId,colecaoId,corId,estadoId,faixaEtariaId,generoId,materialId,modeloId,tamanhoId,itemId,tipoId,valor,qtd)
					line = lines.readline()

					

			line = lines.readline()
		lines.close()
		self.compraDAO.pg.commit()
コード例 #10
0
 def Get(self, query):
     locate_file = ReadFile()
     record = locate_file.readFile("data.txt")
     if '|' in query:
         info = query.split('|')
         year_index = int(info[0]) % 10
         movie_type = self.type.index(info[1])
         index = str(info[0]) + '0' + str(movie_type)
         for each in self.hashTable[year_index]:
             if each[:6] == index:
                 location = int(each[6:])
                 print ", ".join(record[location])
             elif each == '-1':
                 return "The record is not in the database"
     else:
         year_index = int(query) % 10
         for each in self.hashTable[year_index]:
             if each == '-1':
                 # print "no more record in query"
                 break
             else:
                 if each[:4] == query:
                     location = int(each[6:])
                     print ", ".join(record[location])
コード例 #11
0
ファイル: CestaParser.py プロジェクト: gustavodsf/py_projects
 def parse(self, path, counter):
     cestaDAO = CestaDAO.CestaDAO()
     read = ReadFile.ReadFile()
     lines = read.readCsvFile(path)
     line = lines.readline()
     while line:
         line = re.sub(' +', ' ', line)
         line = line.split(' ')
         if "[S]" in line:
             query = "INSERT INTO cesta(qtd_pecas, valor, codigo, loja)VALUES(" + line[
                 len(line) - 3] + ",\'" + line[
                     len(line) -
                     2] + "\'," + line[2] + "," + str(counter) + ");"
             cestaDAO.pg.executeQuery(query)
         elif "[DS]" in line:
             query = "INSERT INTO cesta(qtd_pecas, valor, codigo, loja)VALUES(" + line[
                 len(line) - 4] + ",\'" + line[
                     len(line) -
                     3] + "\'," + line[2] + "," + str(counter) + ");"
             cestaDAO.pg.executeQuery(query)
         line = lines.readline()
     lines.close()
     cestaDAO.pg.commit()
     cestaDAO.pg.disconnect()
コード例 #12
0
ファイル: user.py プロジェクト: bomboom/542_Jiaqi_Chen
__author__ = 'jarfy'
from ReadFile import *
from hashmap import *

test = ReadFile()
data = test.readFile("data.txt")
formate = test.getType()
operator = MovieIndex(formate)
operator.MovieHash(data)
print "show movie index"
print operator.hashTable
print
print "put 46th record into index"
operator.Put(
    46,
    "The Abyss,1970,LaserDisc,Science Fiction,James Cameron,James Cameron,USA,20th Century Fox,$0.00"
)
print operator.hashTable
print
print "Find all movies made in 2000"
operator.Get("2000")
print
print "Find all movies made in 2005"
operator.Get("2005")
print
print "Find all movies made in 2010"
operator.Get("2010")
print
print "Find all DVD movies made in 1977"
operator.Get("1977|DVD")
print
コード例 #13
0
import ReadFile
import preprocess

read = ReadFile.ReadFile(
    "D:\\PycharmProjects\\DBN-SVM\\NSL_KDD-master").get_data()
data_pp = preprocess.Preprocess(read).do_predict_preprocess()

print(data_pp)
コード例 #14
0
from WriteFiles import *
from ReadFile import *
from ProjectionHandler import *

import pandas as pd

writer = WriteData(2018, "stats_file.txt")
reader = ReadFile("stats_file.txt")

writer.write_data_to_text(3)
players3 = reader.generate_players()
writer.write_clean_data_to_csv(players3, "threeyears.csv")

writer.write_data_to_text(2)
players2 = reader.generate_players()
writer.write_clean_data_to_csv(players2, "twoyears.csv")

writer.write_data_to_text(1)
players1 = reader.generate_players()
writer.write_clean_data_to_csv(players1, "oneyear.csv")

writer.write_data_to_text(0)
players0 = reader.generate_players()
writer.write_clean_data_to_csv(players0, "currentyear.csv")

handler = ProjectionHandler("currentyear.csv", players0, "oneyear.csv",
                            players1, "twoyears.csv", players2,
                            "threeyears.csv", players3)
handler.create_dicts()
コード例 #15
0
import sys
import AStar as algorithm
import ReadFile as input

if __name__ == '__main__':
	N,S,G,A = input.ReadFile(sys.argv[1])
	#map = algorithm.MapWithWeight(N,N)
	path={}
	cost={}
	obtacles =[]
	result,obtacles =algorithm.process(N,A,path,cost,S,G)
	#print(obtacles)
	algorithm.write_file(sys.argv[2],N,S,G,result,obtacles)
	

コード例 #16
0
ファイル: user.py プロジェクト: bomboom/542_Jiaqi_Chen
 __author__ = 'jarfy'
import ReadFile
from HashTable import *


#  Test Case One
print('Test Case One --- Set Year and Format as Index.')
test = ReadFile.ReadFile()
test.readFile("data.txt")
test.setattributes('Year', 'Format')
operator = HashTable()
print()
print("put records into index")
for idx in range(len(test.record)):
    operator.Put(str(idx), test.record[idx])
print()
print("Find all DVD movies made in 1997")
temp = operator.Get("1997DVD")
for record in temp:
    print(record)
print()
print("Find all VHS movies made in 1990")
temp = operator.Get("1990VHS")
for record in temp:
    print(record)
print()
print("Find all DVD movies made in 2001")
temp = operator.Get("2001DVD")
for record in temp:
    print(record)
print()