def GenStat(self, fn): with Cons.MT(fn, print_time=False): lap_times = [] with open(fn) as fo: for line in fo.readlines(): line = line.rstrip() if len(line) == 0: continue if line.startswith("#"): continue # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=1 time=219.1 us # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=394 time=1.51 ms m = re.match(r"(?P<lap_time>(\d|\.)+ (us|ms))", line) if m: lt = m.group("lap_time") if lt.endswith(" us"): lt = float(lt[:-3]) elif lt.endswith(" ms"): lt = (float(lt[:-3]) * 1000) lap_times.append(lt) continue raise RuntimeError("Unexpected [%s]" % line) #Cons.P(len(lap_times)) fn_cdf = "%s/%s-cdf" % (_dn_output, os.path.basename(fn)) self.fns_cdf.append(fn_cdf) Stat.GenStat(lap_times, fn_cdf)
def GetNumAccessesStat(): fn_out = "%s/cdf-youtube-accesses-per-co" % Conf.DnOut() if os.path.exists(fn_out): return fn_out num_accesses = [] fn_in = Conf.GetFn("video_accesses_by_COs") with open(fn_in) as fo: while True: line = fo.readline() if len(line) == 0: break line = line.strip() if len(line) == 0: continue if line[0] == "#": continue # 4 34.3305 -111.091 13 t = line.split(" ") if len(t) != 4: raise RuntimeError("Unexpected: [%s]" % line) n = int(t[3]) #Cons.P(n) num_accesses.append(n) for j in range(n): if len(fo.readline()) == 0: raise RuntimeError("Unexpected") r = Stat.Gen(num_accesses, fn_out) #Cons.P(r) return fn_out
def add_book_to_database(path): filepath = os.path.abspath(os.path.join(root, path)) print("Ajout du fichier " + path) # Vérification de l'extension du fichier if filepath.endswith(".pdf"): # Lecture du fichier PDF pdf = parser.PdfReader(filepath) # Récupération des métadonnées du document author = pdf.getAuthor() title = pdf.getTitle() if is_valid(author, title) and not db.book_is_in_database(title, author): # Extraction du texte try: text = pdf.extractText() except: () else: # Récupération des TF de chacun des mots occurences = text.getOccurences() tfs = st.tf(text.getNumberOfWords(), occurences) # Ajout du livre à la base de données db.add_book_to_database(title, author, tfs) # Enregistrement des modifications db.save_database() # Affichage du nombre actuel de livres dans la base print("Nombre de livres dans la base de données: " + str(db.number_books()))
def _GetMemStatByHourFromDstat(fn_ycsb): fn_dstat = _GenDstat(fn_ycsb) col_time = 21 col_mem_buff = 13 #col_mem_cache = 14 #Cons.P(fn_dstat) # Bucketize CPU usage # {hour: [mem_usage]} hour_memusage = {} with open(fn_dstat) as fo: for line in fo: if line.startswith("#"): continue line = line.strip() t = re.split(r" +", line) time0 = t[col_time - 1] mem_buff = int(t[col_mem_buff - 1]) #Cons.P("%s %d" % (time0, mem_buff)) hour = int(time0.split(":")[0]) if hour not in hour_memusage: hour_memusage[hour] = [] hour_memusage[hour].append(mem_buff) hour_memstat = {} for hour, mem_usage in hour_memusage.iteritems(): r = Stat.Gen(mem_usage) #Cons.P("%d %s" % (hour, r)) hour_memstat[hour] = r return hour_memstat
def __init__(self, standard2LetterName): global instantiatedCount Base.__init__(self, standard2LetterName) self.jogos = [] self.i = 0 # this is the nDoConc minus 1 pointer self.getJogosFromDB() self.histG = Stat.makeHistogram(self.jogos) self.initializeHistGOfHistG()
def GenStat(self, fn): with Cons.MT(fn, print_time=False): lap_times = [] fn0 = "%s/result/%s" % (os.path.dirname(__file__), fn) with open(fn0) as fo: for line in fo.readlines(): line = line.rstrip() if len(line) == 0: continue # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=1 time=219.1 us # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=394 time=1.51 ms m = re.match( r"4 KiB from /mnt/.+/ioping-test-data \(ext4 /dev/xvd.\): request=\d+ time=(?P<lap_time>(\d|\.)+ (us|ms))", line) if m: lt = m.group("lap_time") if lt.endswith(" us"): lt = float(lt[:-3]) elif lt.endswith(" ms"): lt = (float(lt[:-3]) * 1000) lap_times.append(lt) continue # --- /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb) ioping statistics --- if re.match( r"--- /mnt/.+/ioping-test-data \(ext4 /dev/xvd.\) ioping statistics ---", line): continue # 1 k requests completed in 175.1 ms, 3.91 MiB read, 5.71 k iops, 22.3 MiB/s # 1 k requests completed in 6.06 s, 3.91 MiB read, 164 iops, 659.8 KiB/s if re.match( r"\d+ k requests completed in .+ (min|s|ms|), .+ MiB read, .+ iops, .+ (K|M)iB/s", line): continue # min/avg/max/mdev = 146.9 us / 175.1 us / 1.77 ms / 79.6 us if re.match( r"min/avg/max/mdev = .+ (u|m)s / .+ (u|m)s / .+ (u|m)s / .+ (u|m)s", line): continue raise RuntimeError("Unexpected [%s]" % line) #Cons.P(len(lap_times)) Stat.GenStat(lap_times, "%s/%s-cdf" % (_dn_stat, fn)) # Throughput in the time order fn_time_order = "%s/%s-time-order" % (_dn_stat, fn) with open(fn_time_order, "w") as fo: for t in lap_times: fo.write("%s\n" % t) Cons.P("Created %s %d" % (fn_time_order, os.path.getsize(fn_time_order)))
def recreate(): print 'recreate() sql tables' for whichDB in fSql.DBCONSTANTS: #if whichDB == 2: #continue print 'recreate() sql tables for db=', whichDB dbObj = fSql.getDBObj(whichDB) if not dbObj: continue if dbObj.whichDB <> whichDB: # this may happen because getDBObj() returns a Sqlite DB object if a MySQL is not available (either MySQLdb module is not available or some other cause like server is offline) continue dbObj.openConnection() conn = dbObj.conn createTablesWithConn(conn, whichDB) conn.close() del dbObj for jogoTipo in ['lf','ms']: hu.doHistoricoUpdater(jogoTipo) Stat.processDBStats(jogoTipo)
def __init__(self, identifier, args): self.identifier = identifier self.descriptors = Descriptor.descriptors() self.attributes = Attribute.attributes() self.stats = Stat.stats() for a in args: as1 = a.split(':') category = as1[0] as2 = as1[1].split('=') command = as2[0] args = as2[1] self.apply(category, command, args)
def _GetCpuStatByHour(fn_ycsb): fn_dstat = _GenDstat(fn_ycsb) col_time = 17 col_cpu_idle = 19 col_cpu_sys = col_cpu_idle + 2 col_cpu_user = col_cpu_idle + 3 col_cpu_iowait = col_cpu_idle + 4 # With SSTable organization computation, there is less CPU usage and a bit more iowait time. # Puzzling. Can't explain why the CPU usage is lower when SSTable organization computation is on # The slightly increased iowait time towards the end might be from the increased amount of log and the overhead of zipping and uploading them. which_cpu = "overall" #which_cpu = "user" #which_cpu = "user+kernel" #which_cpu = "iowait" #Cons.P(fn_dstat) # Bucketize CPU usage # {hour: [cpu_usage]} hour_cpuusage = {} with open(fn_dstat) as fo: for line in fo: if line.startswith("#"): continue line = line.strip() t = re.split(r" +", line) time0 = t[col_time - 1] if which_cpu == "overall": cpu = 100.0 - float(t[col_cpu_idle - 1]) elif which_cpu == "user": cpu = float(t[col_cpu_user - 1]) elif which_cpu == "user+kernel": cpu = float(t[col_cpu_user - 1]) + float(t[col_cpu_sys - 1]) elif which_cpu == "iowait": cpu = float(t[col_cpu_iowait - 1]) else: raise RuntimeError("Unexpected") #Cons.P("%s %s" % (time0, cpu)) hour = int(time0.split(":")[0]) if hour not in hour_cpuusage: hour_cpuusage[hour] = [] hour_cpuusage[hour].append(cpu) hour_cpustat = {} for hour, cpu_usage in hour_cpuusage.iteritems(): r = Stat.Gen(cpu_usage) #Cons.P("%d %s" % (hour, r)) hour_cpustat[hour] = r return hour_cpustat
def scatter_between_class(ds, class_mean, grand_mean): classes = Stat.unique(mh.getColumn(ds, len(ds[0]) - 1)) sb = np.zeros((len(ds[0]) - 1, len(ds[0]) - 1)) for i in range(len(class_mean)): transposta = mh.transposeMatrix(ds.copy()) transposta = np.array(transposta) normal = ds[transposta[len(ds[0]) - 1] == classes[i]] meanc = np.asarray(class_mean[i]).reshape(4, 1) meang = np.asarray(grand_mean).reshape(4, 1) sb += len(normal) * (meanc - meang).dot((meanc - meang).T) return sb
def grand_mean(ds, column=-1): if column > -1: classes = Stat.unique(mh.getColumn(ds, len(ds[0]) - 1)) mean = [] for c in classes: transposta = mh.transposeMatrix(ds.copy()) transposta = np.array(transposta) normal = ds[transposta[len(ds[0]) - 1] == c] mean.append([ Stat.mean(mh.getColumn(normal, i)) for i in range(0, len(normal[0]) - 1) ]) return mean else: return [ Stat.mean(mh.getColumn(ds, i)) for i in range(0, len(ds[0]) - 1) ]
def mat_similarities(tfidfs): """Fabrique la matrice de similarite""" # Matrice de similarité mat_sim = {} # Récupération de la liste des identifiants des livres id_books = tfidfs.keys() # Création des dictionnaires de chaque livre for id_book in id_books: mat_sim[id_book] = {} # Calcul des similarités-cosinus for id_book_i in id_books: tfidf_book_i = np.array(tfidfs[id_book_i], dtype=np.float) for id_book_j in id_books[id_books.index(id_book_i)+1:]: tfidf_book_j = np.array(tfidfs[id_book_j], dtype=np.float) cos = st.similarity(tfidf_book_i, tfidf_book_j) mat_sim[id_book_i][id_book_j] = cos mat_sim[id_book_j][id_book_i] = cos return mat_sim
def scatter_within_class(ds, class_mean): classes = Stat.unique(mh.getColumn(ds, len(ds[0]) - 1)) sw = np.zeros((len(ds[0]) - 1, len(ds[0]) - 1)) for i in range(len(classes)): si = np.zeros((len(ds[0]) - 1, len(ds[0]) - 1)) transposta = mh.transposeMatrix(ds.copy()) transposta = np.array(transposta) normal = ds[transposta[len(ds[0]) - 1] == classes[i]] for j in range(len(normal)): row = normal[j] row = row[0:-1].reshape(4, 1) mean = np.asarray(class_mean[i]).reshape(4, 1) si += (row - mean).dot((row - mean).T) sw += si return sw
def tfidfs(): """Calcule le TF-IDF de tous les livres de la base""" # Récupération de la liste des id_book de la base id_books = db.get_id_books() # Nombre de livres dans la base nb_books = len(id_books) # Récupération, pour chaque mot, du nombre de livres où chaque mot apparaît occ_in_books = db.dic_idword_nbbooks() # Construction d'un dictionnaire des IDF de la base de données dic_idf = st.dic_idf(nb_books, occ_in_books) # Construction d'un dictionnaire associant son TF-IDF à chaque id_book tfidfs = {} for id_book in id_books: dic_tf = db.dic_tf_book(id_book) tfidf = [] for idword in dic_idf: tfidf.append(float(dic_tf.get(idword, 0)) * float(dic_idf[idword])) tfidfs[id_book] = tfidf # print("TFIDF du livre " + str(id_book) + " calculé") return tfidfs
def GenStat(self, fn): with Cons.MT(fn, print_time=False): thrp = [] with open(fn) as fo: for line in fo.readlines(): line = line.rstrip() if len(line) == 0: continue if line.startswith("#"): continue # 0.348919 s, 192 MB/s m = re.match(r"(?P<lap_time>(\d|\.)+) s, .+", line) if m: thrp.append(64.0 / float(m.group("lap_time"))) continue raise RuntimeError("Unexpected %s" % line) #Cons.P(len(thrp)) fn_cdf = "%s/%s-cdf" % (_dn_output, os.path.basename(fn)) self.fns_cdf.append(fn_cdf) Stat.GenStat(thrp, fn_cdf)
def parse(self, filename): players = [] with open(filename, "rb") as csvfile: reader = csv.reader(csvfile, delimiter=",") for row in reader: players.append([ele for ele in row if ele]) self.header = players[0] for player in players: if player[0] == "Player": continue stats = [] i = 0 for ele in player: if i >= 28: break stat = Stat.Stat(self.header[i], ele) stats.append(stat) i += 1 season_player = Player.Player(stats) self.season_players.append(season_player)
def GenStat(self, fn): with Cons.MT(fn, print_time=False): thrp = [] fn0 = "%s/result/%s" % (os.path.dirname(__file__), fn) with open(fn0) as fo: for line in fo.readlines(): if line.startswith("1+0 records in"): continue if line.startswith("1+0 records out"): continue if line.startswith("real"): continue if line.startswith("user"): continue if line.startswith("sys"): continue # 134217728 bytes (134 MB) copied, 0.851289 s, 158 MB/s #m = re.match(r"\d+ bytes \(\d+ MB\) copied, (?P<lap_time>(\d|\.)+) s, .+", line) m = re.match( r"134217728 bytes \(134 MB\) copied, (?P<lap_time>(\d|\.)+) s, .+", line) if m: #Cons.P(m.group("lap_time")) thrp.append(128.0 / float(m.group("lap_time"))) continue raise RuntimeError("Unexpected %s" % line) #Cons.P(len(thrp)) Stat.GenStat(thrp, "%s/%s-cdf" % (_dn_stat, fn)) # Throughput in the time order fn_time_order = "%s/%s-time-order" % (_dn_stat, fn) with open(fn_time_order, "w") as fo: for t in thrp: fo.write("%s\n" % t) Cons.P("Created %s %d" % (fn_time_order, os.path.getsize(fn_time_order)))
def GenMemStatByHour(dn_log_job, exp_dt): #Cons.P("%s %s" % (dn_log_job, exp_dt)) fn = "%s/procmon/%s" % (dn_log_job, exp_dt) if not os.path.exists(fn): fn_zipped = "%s.bz2" % fn if not os.path.exists(fn_zipped): raise RuntimeError("Unexpected: %s" % fn) Util.RunSubp("cd %s && bzip2 -dk %s > /dev/null" % (os.path.dirname(fn_zipped), os.path.basename(fn_zipped))) if not os.path.exists(fn): raise RuntimeError("Unexpected") exp_begin_dt = datetime.datetime.strptime(exp_dt, "%y%m%d-%H%M%S.%f") # man proc. statm hour_mems = {} with open(fn) as fo: for line in fo: t = line.strip().split() dt = datetime.datetime.strptime(t[0], "%y%m%d-%H%M%S") rss = int(t[2]) * 4096 #Cons.P("%s %d" % (dt, rss)) # Convert to relative time rel_dt = dt - exp_begin_dt totalSeconds = rel_dt.seconds h, remainder = divmod(totalSeconds, 3600) if h not in hour_mems: hour_mems[h] = [] hour_mems[h].append(rss) hour_memstat = {} for h, mems in sorted(hour_mems.iteritems()): hour_memstat[h] = Stat.Gen(mems) return hour_memstat
def _GetCpuStatByHour(fn_ycsb): (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb) col_time = 17 col_cpu_idle = 19 col_cpu_sys = col_cpu_idle + 2 col_cpu_user = col_cpu_idle + 3 col_cpu_iowait = col_cpu_idle + 4 #Cons.P(fn_dstat) # Bucketize CPU usage # {hour: [cpu_usage]} hour_cpuusage = {} with open(fn_dstat) as fo: for line in fo: if line.startswith("#"): continue line = line.strip() t = re.split(r" +", line) time0 = t[col_time - 1] cpu = 100.0 - float(t[col_cpu_idle - 1]) #Cons.P("%s %s" % (time0, cpu)) hour = int(time0.split(":")[0]) if hour not in hour_cpuusage: hour_cpuusage[hour] = [] hour_cpuusage[hour].append(cpu) hour_cpustat = {} for hour, cpu_usage in hour_cpuusage.iteritems(): r = Stat.Gen(cpu_usage) #Cons.P("%d %s" % (hour, r)) hour_cpustat[hour] = r return hour_cpustat
def calcAnovaWave(self): # calculates Anova on wave and/or GFP if self.AnovaCheck: self.Wave = Stat.Anova(self.H5, self.Mainframe) # Parametric Analysis if self.AnovaParam: if self.AnalyseType in ["GFP Only", "Both"] \ and self.doAnovaParamGFP: self.Wave.Param(DataGFP=True) self.progressTxt.append('Parametric Anova (GFP) : %s' % self.Wave.elapsedTime) if self.AnalyseType in ["All Electrodes", "Both"] \ and self.doAnovaParamElect: self.Wave.Param() self.progressTxt.append( 'Parametric Anova (All Electrodes) : %s' % self.Wave.elapsedTime) # Non Parametric Analysis else: if self.AnalyseType in ["GFP Only", "Both"] \ and self.doAnovaNonParamGFP: self.Wave.NonParam(self.AnovaIteration, DataGFP=True) self.progressTxt.append('Non-Parametric Anova (GFP) : %s' % self.Wave.elapsedTime) if self.AnalyseType in ["All Electrodes", "Both"] \ and self.doAnovaNonParamElect: self.Wave.NonParam(self.AnovaIteration) self.progressTxt.append( 'Non-Parametric Anova (All Electrodes) : %s' % self.Wave.elapsedTime) # Makes sure that the h5 files are always closed at the end self.cancel = self.Wave.cancel self.Wave.file.close() # calculates PostHoc on wave and/or GFP if self.PostHocCheck: self.WavePostHoc = Stat.PostHoc(self.H5, self.Mainframe) # Parametric if self.PostHocParam: if self.AnalyseType in ["GFP Only", "Both"] \ and self.doPostHocParamGFP: self.WavePostHoc.Param(DataGFP=True) self.progressTxt.append('Parametric PostHoc (GFP) : %s' % self.WavePostHoc.elapsedTime) if self.AnalyseType in ["All Electrodes", "Both"] \ and self.doPostHocParamElect: self.WavePostHoc.Param() self.progressTxt.append( 'Parametric PostHoc (All Electrodes) : %s' % self.WavePostHoc.elapsedTime) # Non Parametric else: if self.AnalyseType in ["GFP Only", "Both"] \ and self.doPostHocNonParamGFP: self.WavePostHoc.NonParam(self.PostHocIteration, DataGFP=True) self.progressTxt.append( 'Non-Parametric PostHoc (GFP) : %s' % self.WavePostHoc.elapsedTime) if self.AnalyseType in ["All Electrodes", "Both"] \ and self.doPostHocNonParamElect: self.WavePostHoc.NonParam(self.PostHocIteration) self.progressTxt.append( 'Non-Parametric PostHoc (All Electrodes) : %s' % self.WavePostHoc.elapsedTime) # Makes sure that the h5 files are always closed at the end self.cancel = self.WavePostHoc.cancel self.WavePostHoc.file.close() # Multiple testing and writing Data if self.SpaceFile == '': self.SpaceFile = None Correction = PostStat.MultipleTestingCorrection( self.H5, self.Mainframe, TF=self.PtsConsec, Alpha=self.Alpha, SpaceCont=self.Clust, SpaceFile=self.SpaceFile) Correction.Calculation() self.Param = {'Anova': self.AnovaParam, 'PostHoc': self.PostHocParam} if self.AnalyseType in ["GFP Only", "Both"]: Writing = PostStat.WriteData(self.PathResult, self.H5, self.Param, DataGFP=True) Writing.StatistcalData(Correction.CorrectedMask) Writing.IntermediateResult() Writing.file.close() if self.AnalyseType in ["All Electrodes", "Both"]: Writing = PostStat.WriteData(self.PathResult, self.H5, self.Param, DataGFP=False) Writing.StatistcalData(Correction.CorrectedMask) Writing.IntermediateResult() Writing.file.close()
### predictTrains = [] predictTests = [] dataAccuracy = [] testAccuracy = [] for i in range(5): sgd = SGD.SGD(r=best_r,epochs=best_epoch,W0=[0]*len(data[i][0])) sgd.fit(data[i],data_labels) predictTrains.append(sgd.predict(data[i])) predictTests.append(sgd.predict(testset[i])) dataAccuracy.append(Stat.F1_Score(sgd.predict(data[i]), data_labels)) testAccuracy.append(Stat.F1_Score(sgd.predict(testset[i]), test_labels)) trainT = np.asarray(predictTrains).T.tolist() testT = np.asarray(predictTests).T.tolist() predictTrain = [] for i in range(len(data[0])): probPos = 0 probNeg = 0 for j in range(5): if predictTrains[j][i] == 1: probPos += dataAccuracy[j] else: probNeg += dataAccuracy[j] if probPos > probNeg:
'class', Globals.euclidean) data = result['aggregated_confusion_matrix'] list_accuracy = list(result[i]['class_stat']['accuracy'] for i in range(10)) print 'Number of instances : ', result['number_instance'] print 'Number of Features : ', len(result['column_list']) - 1 print 'Classes : ', result['list_classes'] print 'Confusion Matrix for the dataset over 10 runs :' for i in data.keys(): print i, ' ', for j in data.keys(): print data[i][j], ' ', print '' print 'Accuracy for 10 runs: ', list_accuracy print 'Mean Accuracy : ', Stat.mean(list_accuracy) print 'Variance : ', Stat.variance(list_accuracy) print 'Standard Deviation : ', Stat.standard_deviation(list_accuracy) plt.xlabel('sepal width in cm') plt.ylabel('petal width in cm') x, y, class_column_name = 'sepal width in cm', 'petal width in cm', result[ 'class_column_name'] new_train_list = sorted(result['training_dataset'], key=lambda k: (float(k[y]), float(k[x])), reverse=True) new_test_list = sorted(result['test_dataset'], key=lambda k: (float(k[y]), float(k[x])), reverse=True)
def main(inF, outF): # use inline command holding the file name file = inF # read in input file separated by sheets try: dataClasses = pd.read_excel(file, sheet_name='Schedule') # reading file except Exception: raise Exception( "Error: There is no table in your classroom file titled 'Schedule'." ) try: dataRooms = pd.read_excel(file, sheet_name='Capacity') # reading file except Exception: raise Exception( "Error: There is no table in your classroom file titled 'Capacity'." ) try: dataBuild = pd.read_excel(file, sheet_name='Coords') # reading file except Exception: raise Exception( "Error: There is no table in your classroom file titled 'Coords'.") # convert pandas data frame into raw values courses = dataClasses.values rooms = dataRooms.values buildings = dataBuild.values # initialize arrays to hold room and course objects courseList = [] roomList = [] buildList = [] subjectTobuilding = {} # create schedule object spring2020 = Schedule() # import time slots into schedule based on days for i in dataClasses.Time: # if statement to separate slots based on days and to avoid repeats if (("mw" in i) or ("MW" in i)) and not (i in spring2020.mw): spring2020.mw.append(i.lower()) if (("tt" in i) or ("TT" in i)) and not (i in spring2020.tt): spring2020.tt.append(i.lower()) if (("MWF" in i) or ("mwf" in i)) and not (i in spring2020.mwf): spring2020.mwf.append(i.lower()) # create courses and add to Course list for i in courses: # (self, subject, course, title, ver, sec, professor, time, cap): courseList.append( Course(i[0], str(i[1]), i[2], i[3], i[4], i[5], i[6].lower(), i[7])) # convert given time value into to an easier to read and understand format for i in courseList: if "mw" in i.time: if "mwf" in i.time: i.days = "Mon/Wed/Fri" temp = i.time.split("mwf") i.Mtime = convert_Time(temp) else: i.days = "Mon/Wed" temp = i.time.split("mw") i.Mtime = convert_Time(temp) if "tt" in i.time: i.days = "Tues/Thurs" temp = i.time.split("tt") i.Mtime = convert_Time(temp) # create rooms and add to room list for i in rooms: roomList.append(Room(i[0], i[1])) # create building objects and add them to list for i in range(len(buildings)): buildList.append( Building(buildings[i][0], buildings[i][1], buildings[i][2], buildings[i][3])) subjectTobuilding[buildList[i].subject] = buildList[i].name # warning file for error output during schedule generation warningTextFile = "warning.txt" fo = open(warningTextFile, "w") fo.write("Warnings:\n") fo.close() # make 5 copies of schedule, courseList, and roomList S1 = copy.deepcopy(spring2020) S1c = copy.deepcopy(courseList) S1r = copy.deepcopy(roomList) S2 = copy.deepcopy(spring2020) S2c = copy.deepcopy(courseList) S2r = copy.deepcopy(roomList) S3 = copy.deepcopy(spring2020) S3c = copy.deepcopy(courseList) S3r = copy.deepcopy(roomList) S4 = copy.deepcopy(spring2020) S4c = copy.deepcopy(courseList) S4r = copy.deepcopy(roomList) S5 = copy.deepcopy(spring2020) S5c = copy.deepcopy(courseList) S5r = copy.deepcopy(roomList) # First generated schedule rooms and courses in given order generate_schedule(S1, S1c, S1r, buildList, subjectTobuilding) # set main schedule to S1 spring2020 = copy.deepcopy(S1) courseList = copy.deepcopy(S1c) # organize courseList by capacity S2c.sort(key=lambda course: course.cap) # second generated schedule room in given order courseList organized by cap G -> L generate_schedule(S2, S2c, S2r, buildList, subjectTobuilding) # if schedule has less unscheduled classes than spring2020 make this spring 2020 if len(S2.unScheduled) < len(spring2020.unScheduled): spring2020 = copy.deepcopy(S2) courseList = copy.deepcopy(S2c) S3c.sort(key=lambda course: course.cap, reverse=True) # third generated schedule room in given order courseList organized by cap L -> G generate_schedule(S3, S3c, S3r, buildList, subjectTobuilding) if len(S3.unScheduled) < len(spring2020.unScheduled): spring2020 = copy.deepcopy(S3) courseList = copy.deepcopy(S3c) # sort room list by capacity S4r.sort(key=lambda room: room.cap) S4c.sort(key=lambda course: course.cap) # fourth generated schedule room organized by cap G -> l courseList organized by cap G -> l generate_schedule(S4, S4c, S4r, buildList, subjectTobuilding) if len(S4.unScheduled) < len(spring2020.unScheduled): spring2020 = copy.deepcopy(S4) courseList = copy.deepcopy(S4c) S5c.sort(key=lambda course: course.cap, reverse=True) S5r.sort(key=lambda room: room.cap) # fourth generated schedule room organized by cap G -> l courseList organized by cap L -> G generate_schedule(S5, S5c, S5r, buildList, subjectTobuilding) if len(S5.unScheduled) < len(spring2020.unScheduled): spring2020 = copy.deepcopy(S5) courseList = copy.deepcopy(S5c) # sort spring 2020 by time in days spring2020.solution[0].sort(key=lambda course: course.Mtime.hour) spring2020.solution[1].sort(key=lambda course: course.Mtime.hour) spring2020.solution[2].sort(key=lambda course: course.Mtime.hour) spring2020.solution[3].sort(key=lambda course: course.Mtime.hour) spring2020.solution[4].sort(key=lambda course: course.Mtime.hour) # write schedule to output file generate_output(spring2020, courseList, outF) # print_schedule(spring2020) Stat.main(outF)
def __init__(self, exp_set_id, stg_dev): conf_sd = Conf.Get(exp_set_id)[stg_dev] t = conf_sd["jobid_expdt"].split("/") job_id = t[0] exp_dt = t[1] t = conf_sd["time_window"].split("-") exp_time_begin = t[0] exp_time_end = t[1] dn_log = Conf.GetDir("dn") dn_log_job = "%s/%s" % (dn_log, job_id) self.fn_out = "%s/ycsb-by-time-%s" % (Conf.GetOutDir(), exp_dt) if os.path.isfile(self.fn_out): return self.exp_begin_dt = datetime.datetime.strptime(exp_dt, "%y%m%d-%H%M%S.%f") #Cons.P(self.exp_begin_dt) with Cons.MT("Generating ycsb time-vs-metrics file for plot ..."): fn_log_ycsb = "%s/ycsb/%s-d" % (dn_log_job, exp_dt) # Unzip when the file is not there if not os.path.exists(fn_log_ycsb): fn_zipped = "%s.bz2" % fn_log_ycsb if not os.path.exists(fn_zipped): raise RuntimeError("Unexpected: %s" % fn_log_ycsb) Util.RunSubp( "cd %s && bzip2 -dk %s > /dev/null" % (os.path.dirname(fn_zipped), os.path.basename(fn_zipped))) if not os.path.exists(fn_log_ycsb): raise RuntimeError("Unexpected") mo_list = [] line_params = None line_run = None with open(fn_log_ycsb) as fo: for line in fo: #Cons.P(line) # 2017-10-13 20:41:01:258 2 sec: 34 operations; 34 current ops/sec; est completion in 68 days 1 hours [READ: Count=28, Max=46943, Min=33, # Avg=32239.54, 90=45343, 99=46943, 99.9=46943, 99.99=46943] [INSERT: Count=8, Max=9343, Min=221, Avg=4660.88, 90=8695, 99=9343, 99.9=9343, # 99.99=9343] mo = re.match(r"\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d:\d\d\d (?P<rel_time>\d+) sec: \d+ operations; " \ "(?P<db_iops>(\d|\.)+) current ops\/sec; .*" \ "\[READ: Count=(?P<r_cnt>\d+), Max=(?P<r_max>\d+), Min=(?P<r_min>\d+), Avg=(?P<r_avg>(\d|\.)+)," \ " 90=(?P<r_90>\d+), 99=(?P<r_99>\d+), 99.9=(?P<r_999>\d+), 99.99=(?P<r_9999>\d+)\] " \ "\[INSERT: Count=(?P<w_cnt>\d+), Max=(?P<w_max>\d+), Min=(?P<w_min>\d+), Avg=(?P<w_avg>(\d|\.)+)," \ " 90=(?P<w_90>\d+), 99=(?P<w_99>\d+), 99.9=(?P<w_999>\d+), 99.99=(?P<w_9999>\d+)\] " \ , line) if mo is not None: total_seconds = int(mo.group("rel_time")) s = total_seconds % 60 total_seconds -= s total_mins = total_seconds / 60 m = total_mins % 60 total_mins -= m h = total_mins / 60 rel_time = "%02d:%02d:%02d" % (h, m, s) mo_list.append((rel_time, mo)) continue if line.startswith("params = {"): line_params = line continue if line.startswith("run = {"): line_run = line continue cnt = 0 db_iops = [] r_cnt = 0 r_avg = 0.0 r_min = 0 r_max = 0 r_90 = 0 r_99 = 0 r_999 = 0 r_9999 = 0 w_cnt = 0 w_avg = 0.0 w_min = 0 w_max = 0 w_90 = 0 w_99 = 0 w_999 = 0 w_9999 = 0 for e in mo_list: rel_time = e[0] if (exp_time_begin < rel_time) and (rel_time < exp_time_end): mo = e[1] db_iops.append(float(mo.group("db_iops"))) r_cnt += int(mo.group("r_cnt")) r_avg += float(mo.group("r_avg")) r_min += int(mo.group("r_min")) r_max += int(mo.group("r_max")) r_90 += int(mo.group("r_90")) r_99 += int(mo.group("r_99")) r_999 += int(mo.group("r_999")) r_9999 += int(mo.group("r_9999")) w_cnt += int(mo.group("w_cnt")) w_avg += float(mo.group("w_avg")) w_min += int(mo.group("w_min")) w_max += int(mo.group("w_max")) w_90 += int(mo.group("w_90")) w_99 += int(mo.group("w_99")) w_999 += int(mo.group("w_999")) w_9999 += int(mo.group("w_9999")) cnt += 1 db_iops_stat = Stat.Gen(db_iops) with open(self.fn_out, "w") as fo_out: fo_out.write("# %s" % line_params) fo_out.write("# %s" % line_run) fo_out.write("\n") fo_out.write("# In the time range (%s, %s):\n" % (exp_time_begin, exp_time_end)) fo_out.write("# db_iops.avg= %14f\n" % db_iops_stat.avg) fo_out.write("# db_iops.min= %14f\n" % db_iops_stat.min) fo_out.write("# db_iops.max= %14f\n" % db_iops_stat.max) fo_out.write("# db_iops._25= %14f\n" % db_iops_stat._25) fo_out.write("# db_iops._50= %14f\n" % db_iops_stat._50) fo_out.write("# db_iops._75= %14f\n" % db_iops_stat._75) fo_out.write("# r_cnt = %14f\n" % (float(r_cnt) / cnt)) fo_out.write("# r_avg = %14f\n" % (float(r_avg) / cnt)) fo_out.write("# r_min = %14f\n" % (float(r_min) / cnt)) fo_out.write("# r_max = %14f\n" % (float(r_max) / cnt)) fo_out.write("# r_90 = %14f\n" % (float(r_90) / cnt)) fo_out.write("# r_99 = %14f\n" % (float(r_99) / cnt)) fo_out.write("# r_999 = %14f\n" % (float(r_999) / cnt)) fo_out.write("# r_9999 = %14f\n" % (float(r_9999) / cnt)) fo_out.write("# w_cnt = %14f\n" % (float(w_cnt) / cnt)) fo_out.write("# w_avg = %14f\n" % (float(w_avg) / cnt)) fo_out.write("# w_min = %14f\n" % (float(w_min) / cnt)) fo_out.write("# w_max = %14f\n" % (float(w_max) / cnt)) fo_out.write("# w_90 = %14f\n" % (float(w_90) / cnt)) fo_out.write("# w_99 = %14f\n" % (float(w_99) / cnt)) fo_out.write("# w_999 = %14f\n" % (float(w_999) / cnt)) fo_out.write("# w_9999 = %14f\n" % (float(w_9999) / cnt)) fo_out.write("\n") fmt = "%8s" \ " %9.2f" \ " %6d %8.2f %3d %6d" \ " %6d %6d %6d %6d" \ " %6d %8.2f %3d %6d" \ " %6d %6d %6d %6d" header = Util.BuildHeader(fmt, "rel_time" \ " db_iops" \ " read_cnt read_lat_avg read_lat_min read_lat_max" \ " read_lat_90p read_lat_99p read_lat_99.9p read_lat_99.99p" \ " write_cnt write_lat_avg write_lat_min write_lat_max" \ " write_lat_90p write_lat_99p write_lat_99.9p write_lat_99.99p" \ ) i = 0 for e in mo_list: rel_time = e[0] mo = e[1] if i % 40 == 0: fo_out.write(header + "\n") fo_out.write( (fmt + "\n") % (rel_time, float(mo.group("db_iops")), int(mo.group("r_cnt")), float(mo.group("r_avg")), int(mo.group("r_min")), int(mo.group("r_max")), int(mo.group("r_90")), int(mo.group("r_99")), int(mo.group("r_999")), int(mo.group("r_9999")), int(mo.group("w_cnt")), float(mo.group("w_avg")), int(mo.group("w_min")), int(mo.group("w_max")), int(mo.group("w_90")), int(mo.group("w_99")), int(mo.group("w_999")), int(mo.group("w_9999")))) i += 1 Cons.P("Created %s %d" % (self.fn_out, os.path.getsize(self.fn_out)))
def stat(filepath): info, err = Stat(filepath) if err: raise OSError(err.Error()) return StatResult(info)
for epoch in [20]:#,10,15,25]: for g0 in [1.1,1.01,1.001]: tmp = [] sgd = SGD_SVM.SGDSVM(C=C,ro=ro,epochs=epoch,W0=[0]*len(phiTrain[0]),gamma0=g0) kfold = KFold.KFold(n_splits=5) for kf in kfold.split(phiTrain): train2 = [phiTrain[i] for i in kf[0]] train_label2 = [data_labels[i] for i in kf[0]] test2 = [phiTrain[i] for i in kf[1]] test_label2 = [data_labels[i] for i in kf[1]] sgd.fit(train2, train_label2) predict_tmp = sgd.predict(test2) tmp.append(Stat.Accuracy(predict_tmp,test_label2)) if np.mean(tmp) > best_accuracy: best_accuracy = np.mean(tmp) best_C = C best_epoch = epoch best_g0 = g0 best_ro = ro ### print("mid cross-validation") sgd = SGD_SVM.SGDSVM(C=best_C,ro=best_ro,epochs=best_epoch,W0=[0]*len(phiTrain[0]),gamma0=best_g0) sgd.fit(phiTrain,train_label)
def __init__(self, fn_in, time_begin, time_end, overloaded): self.overloaded = overloaded # Unzip when the file is not there if not os.path.exists(fn_in): fn_zipped = "%s.bz2" % fn_in if not os.path.exists(fn_zipped): raise RuntimeError("Unexpected: %s" % fn_in) Util.RunSubp( "cd %s && bzip2 -dk %s > /dev/null" % (os.path.dirname(fn_zipped), os.path.basename(fn_zipped))) if not os.path.exists(fn_in): raise RuntimeError("Unexpected") #Cons.P(fn_in) mo_list = [] line_params = None line_run = None with open(fn_in) as fo: for line in fo: #Cons.P(line) # 2017-10-13 20:41:01:258 2 sec: 34 operations; 34 current ops/sec; est completion in 68 days 1 hours [READ: Count=28, Max=46943, Min=33, # Avg=32239.54, 90=45343, 99=46943, 99.9=46943, 99.99=46943] [INSERT: Count=8, Max=9343, Min=221, Avg=4660.88, 90=8695, 99=9343, 99.9=9343, # 99.99=9343] mo = re.match(r"\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d:\d\d\d (?P<rel_time>\d+) sec: \d+ operations; " \ "(?P<db_iops>(\d|\.)+) current ops\/sec; .*" \ "\[READ: Count=(?P<r_cnt>\d+), Max=(?P<r_max>\d+), Min=(?P<r_min>\d+), Avg=(?P<r_avg>(\d|\.)+)," \ " 90=(?P<r_90>\d+), 99=(?P<r_99>\d+), 99.9=(?P<r_999>\d+), 99.99=(?P<r_9999>\d+)\] " \ "\[INSERT: Count=(?P<w_cnt>\d+), Max=(?P<w_max>\d+), Min=(?P<w_min>\d+), Avg=(?P<w_avg>(\d|\.)+)," \ " 90=(?P<w_90>\d+), 99=(?P<w_99>\d+), 99.9=(?P<w_999>\d+), 99.99=(?P<w_9999>\d+)\] " \ , line) if mo is None: continue total_seconds = int(mo.group("rel_time")) s = total_seconds % 60 total_seconds -= s total_mins = total_seconds / 60 m = total_mins % 60 total_mins -= m h = total_mins / 60 rel_time = "%02d:%02d:%02d" % (h, m, s) if (time_begin <= rel_time) and (rel_time <= time_end): mo_list.append((rel_time, mo)) if len(mo_list) == 0: raise RuntimeError("Unexpected. Check file [%s]" % fn_in) cnt = 0 db_iops = [] r_cnt = 0 r_avg = 0.0 r_min = 0 r_max = 0 r_90 = 0 r_99 = 0 r_999 = 0 r_9999 = 0 w_cnt = 0 w_avg = 0.0 w_min = 0 w_max = 0 w_90 = 0 w_99 = 0 w_999 = 0 w_9999 = 0 for e in mo_list: rel_time = e[0] mo = e[1] db_iops.append(float(mo.group("db_iops"))) r_cnt += int(mo.group("r_cnt")) r_avg += float(mo.group("r_avg")) r_min += int(mo.group("r_min")) r_max += int(mo.group("r_max")) r_90 += int(mo.group("r_90")) r_99 += int(mo.group("r_99")) r_999 += int(mo.group("r_999")) r_9999 += int(mo.group("r_9999")) w_cnt += int(mo.group("w_cnt")) w_avg += float(mo.group("w_avg")) w_min += int(mo.group("w_min")) w_max += int(mo.group("w_max")) w_90 += int(mo.group("w_90")) w_99 += int(mo.group("w_99")) w_999 += int(mo.group("w_999")) w_9999 += int(mo.group("w_9999")) cnt += 1 self.db_iops_stat = Stat.Gen(db_iops) self.r_cnt = r_cnt self.r_avg = (float(r_avg) / cnt) self.r_min = (float(r_min) / cnt) self.r_max = (float(r_max) / cnt) self.r_90 = (float(r_90) / cnt) self.r_99 = (float(r_99) / cnt) self.r_999 = (float(r_999) / cnt) self.r_9999 = (float(r_9999) / cnt) self.w_cnt = (float(w_cnt) / cnt) self.w_avg = (float(w_avg) / cnt) self.w_min = (float(w_min) / cnt) self.w_max = (float(w_max) / cnt) self.w_90 = (float(w_90) / cnt) self.w_99 = (float(w_99) / cnt) self.w_999 = (float(w_999) / cnt) self.w_9999 = (float(w_9999) / cnt)
def KMeans(ds, k): number_centroids = k x = MatrixHandler.transposeMatrix(ds) random_centroids = [] #For each centroids for i in range(0, number_centroids): random_sample = randrange(len(x[0])) dim = [] #For each dimension of X for j in range(len(x)): dim.append(x[j][random_sample]) random_centroids.append(dim) random_centroids = np.asarray(random_centroids) #Get clusters cluster = get_clusters(random_centroids, k, ds) # t=x[0] # y=x[1] # classes = cluster # unique = list(set(classes)) # colors = [plt.cm.jet(float(i)/max(unique)) for i in unique] # for i, u in enumerate(unique): # xi = [t[j] for j in range(len(t)) if classes[j] == u] # yi = [y[j] for j in range(len(t)) if classes[j] == u] # plt.scatter(xi, yi, c=colors[i], label=str(u)) # plt.legend() # random_centroids = MatrixHandler.transposeMatrix(random_centroids) # plt.scatter(random_centroids[0], random_centroids[1], color='red') # plt.show() last_centroids = random_centroids is_eq_last = False while (not is_eq_last): new_centroids = [] for c in Stat.unique(cluster): new_centroids.append(NDMean(ds[np.asarray(cluster) == c]).tolist()) new_centroids = np.asarray(new_centroids) is_eq_last = np.array_equal(new_centroids, last_centroids) last_centroids = new_centroids cluster = get_clusters(new_centroids, k, ds) # t=x[0] # y=x[1] # classes = cluster # unique = list(set(classes)) # colors = [plt.cm.jet(float(i)/max(unique)) for i in unique] # for i, u in enumerate(unique): # xi = [t[j] for j in range(len(t)) if classes[j] == u] # yi = [y[j] for j in range(len(t)) if classes[j] == u] # plt.scatter(xi, yi, c=colors[i], label=str(u)) # plt.legend() # new_centroids = MatrixHandler.transposeMatrix(new_centroids) # plt.scatter(new_centroids[0], new_centroids[1], color='red') # plt.show() # print(ds) # print(cluster) return np.append(ds, np.asarray(cluster).reshape(len(cluster), 1), axis=1), last_centroids
def checkLandscape(path): # test_imagepath = path test_hist = returnHistogram(path) test_sky = returnSky(path) part_t = 0 iteration = 0 neurons = [] neurons_sky = [] for i in range(neurons_amount): neurons.append(Neuron(input_neuron_data, n, 1, np.array([1]))) for i in range(neurons_amount): neurons_sky.append(Neuron(input_neuron_data, n, 1, np.array([1]))) hist_count_out = Neuron(input_neuron_data, n, neurons_amount, np.ones(neurons_amount)) sky_count_out = Neuron(input_neuron_data, n, neurons_amount, np.ones(neurons_amount)) hist_out = Neuron(input_neuron_data, n, 1, np.array([1])) sky_out = Neuron(input_neuron_data, n, 1, np.array([1])) out_positive = Neuron(input_neuron_data, n, 1, np.array([1])) out_negative = Neuron(input_neuron_data, n, 1, np.array([1])) isLandscape = False stat = Stat(image_list_count) stat.neuron_amount = neurons_amount stat.StartTimer() for t in range(T - 1): if t % single_iteration_time == 0 and t < T - end_spike_time: part_t = 0 # model_hist = returnHistogram(image_list[iteration]) # model_sky = returnSky(image_list[iteration]) model_hist = hist_data[iteration] model_sky = sky_data[iteration] # print("iteration: " + str(iteration)) iteration += 1 for i in range(0, neurons_amount): if test_hist[i] == part_t: neurons[i].setCurrent(2, t, 0) stat.hist_fired += 1 stat.hist_fired_tab[iteration] += 1 #print("hist " + str(t) + " " + str(i)) neurons[i].calc(t) for i in range(0, neurons_amount): if test_sky[i] == part_t: neurons_sky[i].setCurrent(2, t, 0) stat.sky_fired += 1 stat.sky_fired_tab[iteration] += 1 #print("sky " + str(t) + " " + str(i)) neurons_sky[i].calc(t) #check fired if t > 10 and t < T - end_spike_time: part_t_back = part_t - 11 for j in range(neurons_amount): if neurons[j].fired == t and model_hist[j] > part_t_back - hist_count_offset and model_hist[j] < part_t_back + hist_count_offset: hist_count_out.setCurrent(1, t, j) # print("hist fired " + str(iteration) + " " + str(t) + " " + str(j)) for j in range(neurons_amount): if neurons_sky[j].fired == t and model_sky[j] > part_t_back - sky_count_offset and model_sky[j] < part_t_back + sky_count_offset: sky_count_out.setCurrent(1, t, j) # print("sky fired " + str(iteration) + " " + str(t) + " " + str(j)) hist_count_out.calc(t) sky_count_out.calc(t) #last neuron if part_t > single_iteration_time - 50 and part_t < single_iteration_time - 45 and hist_count_out.u_step[t] > treshold: hist_out.setCurrent(1, t, 0) else: hist_out.setCurrent(0, t, 0) hist_out.calc(t) if part_t > single_iteration_time - 30 and part_t < single_iteration_time - 25 and sky_count_out.u_step[t] > treshold_sky: sky_out.setCurrent(1, t, 0) else: sky_out.setCurrent(0, t, 0) sky_out.calc(t) if part_t > single_iteration_time - 10 and part_t < single_iteration_time - 5: sky_count_out.u_step[t + 1] = sky_count_out.u[t] hist_count_out.u_step[t + 1] = hist_count_out.u[t] if t > T - 20 and t < T - 15: if T - 19 == t: print(" HIST u_step: " + str(hist_out.u_step[t]) + " SKY u_step: " +str(sky_out.u_step[t])) if sky_out.u_step[t] > out_sky_treshold and hist_out.u_step[t] > out_treshold: # print("last neuron fired") stat.isLandscape = True out_positive.setCurrent(1, t, 0) else: # print("last neuron not fired") out_negative.setCurrent(1, t, 0) else: out_positive.setCurrent(0, t, 0) out_negative.setCurrent(0, t, 0) out_positive.calc(t) out_negative.calc(t) part_t += 1 stat.EndTimer() return stat
def addJogo(self, jogo): if self.ateConcurso == -1: self.setAteConcurso() self.workJogos.append(jogo) self.histG = Stat.makeHistogram(self.workJogos) self.ateConcurso += 1
def WriteLat(self): if self.w_stat is not None: return self.w_stat with Cons.MT("Generating write latency stat ..."): self.w_stat = Stat.Gen(self.w_raw) return self.w_stat
def ReadLat(self): if self.r_stat is not None: return self.r_stat with Cons.MT("Generating read latency stat ..."): self.r_stat = Stat.Gen(self.r_raw) return self.r_stat
def setAteConcurso(self, ateConcurso=-1): self.ateConcurso = ateConcurso self.workJogos = self.jogosObj.getJogosAteConcurso(ateConcurso) self.histG = Stat.makeHistogram(self.workJogos) self.ateConcurso = len(self.workJogos)
def getInfo(self): if self.scoresPopulated: return else: newResponse = requests.get(self.url) newSoup = BeautifulSoup(newResponse.text, 'html.parser') table = newSoup.find('table') rows = table.find_all('tr') results = [] #Skips first row, which is useless for our purposes. for i in range(1, len(rows)): table_headers = rows[i].find_all('th') if table_headers: if (i == 1): result = [ "Rank", "Date", "Game #", "Age", "Team", "", "Opp", "Result" ] for j in range(len(result), len(table_headers)): label = str(table_headers[j]) #print(label[16:].find("\""), label) startIndex = label.find("\"") + 1 endIndex = label[ (startIndex):].find("\"") + startIndex trueLabel = label[startIndex:endIndex] result += [trueLabel] results.append(result) else: for headers in table_headers: results.append([headers.get_text()]) table_data = rows[i].find_all('td') if table_data: results.append([data.get_text() for data in table_data]) #results = results[1:len(results)] ind = 0 for ind in range(0, len(results)): #print(ind, ":", len(results[ind]), ":", results[ind]) if ind % 2 == 1: results[int( (ind + 2) / 2)] = results[ind] + results[ind + 1] results = results[0:int(ind / 2)] statNames = results[0] self.valueList = [] for j in range(0, len(statNames)): curStat = Stat.getStat(statNames[j]) self.valueList += [curStat.value] statNames[j] = curStat for i in range(1, len(results)): score = 0 for j in range(0, len(results[i])): curr = results[i][j] if curr.isdigit() or (len(curr) > 0 and curr[0] == "-" and curr[1:].isdigit()): score += self.valueList[j] * int(results[i][j]) self.addScore(i, round(score, 2)) self.data = results self.cleanUp() self.getFanTable() self.scoresPopulated = True
def main(): path = "/Users/u15672269/stat" data_path = "/Users/u15672269/Desktop/For_Kseniya/однородность.xls" title = "Отчет о показателях качества тестовых заданий по курсу Информатика 2018-2019 учебного года 1 семестра" KO_I = True KO_II = True correlation = True report = Document() report.add_heading(title, 0) if (KO_I or KO_II or correlation): dictionary = DataReader.read_dictionary_from_excel(data_path) data = DataReader.read_raw_data_from_excel(data_path, dictionary) data_KO = [] keys = [] # состав вопросов в тесте test = {} for i in data: if i[2] != '': question = dictionary[i[0]][0] val = test.get(question[0]) if val is None: test[question[0]] = list() test[question[0]].append(question[1]) else: if question[1] not in test[question[0]]: test[question[0]].append(question[1]) key = (question, i[1], i[2]) if key not in keys: count = sum(elem[0] == i[0] and elem[1] == key[1] and elem[2] == key[2] for elem in data) data_KO.append([i[0], i[1], i[2], count, i[4], i[5]]) keys.append(key) print("ok") if KO_I: print("KO_I processing started") formulation_stat = Stat.get_question_formulation_stat( Stat.count_formulation_stat(data_KO, dictionary)) formulation_homogeneity = {} for key, question_stat in formulation_stat.items(): formulation_homogeneity[key] = Stat.test_formulation_homogeneity( question_stat) DataPrinter.create_report_KO_I(report, formulation_homogeneity, path) print("KO_I processing finished") if KO_II: print("KO_II processing started") distractor_frequency_stat = Stat.get_distractor_frequency_stat( data_KO, dictionary) distractor_homogeneity = Stat.test_distractor_homogeneity( distractor_frequency_stat, 0.05, 0, 100) DataPrinter.create_report_KO_II(report, distractor_frequency_stat, distractor_homogeneity, path) print("KO_II processing finished") if correlation: print("correlation processing started") correlation_stat = Stat.get_correlation_matrix( test, Stat.group_stat_by_student(data, dictionary)) DataPrinter.create_report_correlation(report, correlation_stat, path) print("correlation processing finished") report.save(os.path.join(path, '{}.docx'.format(title))) return
def calcAnovaIS(self): """TODO: implement the checks for rerun""" # calculates Anova on inverse space (IS) if self.AnovaCheck: self.IS = Stat.Anova(self.H5, self.Mainframe) # Parametric Analysis if self.AnovaParam: if self.doAnovaParamIS: self.IS.Param() self.progressTxt.append('Parametric Anova (IS) : %s' % self.IS.elapsedTime) # Non Parametric Analysis else: if self.doAnovaNonParamIS: self.IS.NonParam(self.AnovaIteration) self.progressTxt.append('Non-Parametric Anova (IS) : %s' % self.IS.elapsedTime) # Makes sure that the h5 files are always closed at the end self.IS.file.close() self.cancel = self.IS.cancel # calculates PostHoc on inverse space (IS) if self.PostHocCheck: self.ISPostHoc = Stat.PostHoc(self.H5, self.Mainframe) # Parametric Analysis if self.PostHocParam: if self.doPostHocParamIS: self.ISPostHoc.Param() self.progressTxt.append('Parametric PostHoc (IS) : %s' % self.ISPostHoc.elapsedTime) # Non Parametric Analysis else: if self.doPostHocNonParamIS: self.ISPostHoc.NonParam(self.PostHocIteration) self.progressTxt.append( 'Non-Parametric PostHoc (IS) : %s' % self.ISPostHoc.elapsedTime) # Makes sure that the h5 files are always closed at the end self.ISPostHoc.file.close() self.cancel = self.ISPostHoc.cancel # Multiple testing and writing Data, Post Stat Analysis - i.e Mathematical Morphology, write if self.SpaceFile == '': self.SpaceFile = None Correction = PostStat.MultipleTestingCorrection( self.H5, self.Mainframe, TF=self.PtsConsec, Alpha=self.Alpha, SpaceCont=self.Clust, SpaceFile=self.SpaceFile) Correction.Calculation() self.Param = {'Anova': self.AnovaParam, 'PostHoc': self.PostHocParam} Writing = PostStat.WriteData(self.PathResult, self.H5, self.Param, DataGFP=False) Writing.StatistcalData(Correction.CorrectedMask) Writing.IntermediateResult() Writing.file.close()
print("best gamma0:", best_g0) ''' ### sgd = SGD_SVM.SGDSVM(C=best_C, ro=best_ro, epochs=best_epoch, W0=[0] * len(data[0]), gamma0=best_g0) sgd.fit(data, data_labels) predictTrain = sgd.predict(data) predictTest = sgd.predict(testset) print("Accuracy for training set:") print(Stat.Accuracy(predictTrain, data_labels)) print("F1 score for training set:") print(Stat.F1_Score(predictTrain, data_labels)) print("Precision for training set:") print(Stat.Precision(predictTrain, data_labels)) print("Recall for training set:") print(Stat.Recall(predictTrain, data_labels)) print("Accuracy for test set") print(Stat.Accuracy(predictTest, test_labels)) print("F1 score for test set") print(Stat.F1_Score(predictTest, test_labels))
data = result['aggregated_confusion_matrix'] list_accuracy = list(result[i]['class_stat']['accuracy'] for i in range(10)) print 'Number of instances : ', result['number_instance'] print 'Number of Features : ', len(result['column_list']) - 1 print 'Classes : ', result['list_classes'] print 'Confusion Matrix for the dataset over 10 runs :' for i in data.keys(): print i, ' ', for j in data.keys(): print data[i][j], ' ', print '' print 'Accuracy for 10 runs: ', list_accuracy print 'Mean Accuracy : ', Stat.mean(list_accuracy) print 'Variance : ', Stat.variance(list_accuracy) print 'Standard Deviation : ', Stat.standard_deviation(list_accuracy) plt.xlabel('sepal width in cm') plt.ylabel('petal width in cm') x, y, class_column_name = 'sepal width in cm', 'petal width in cm', result['class_column_name'] new_train_list = sorted(result['training_dataset'], key=lambda k: (float(k[y]), float(k[x])), reverse=True) new_test_list = sorted(result['test_dataset'], key=lambda k: (float(k[y]), float(k[x])), reverse=True) plt.axis([0, 5, 0, 3]) for i in new_train_list: if i[class_column_name] == 'Iris-setosa': ro, = plt.plot(i[x], i[y], 'ro')