Esempio n. 1
0
    def GenStat(self, fn):
        with Cons.MT(fn, print_time=False):
            lap_times = []
            with open(fn) as fo:
                for line in fo.readlines():
                    line = line.rstrip()
                    if len(line) == 0:
                        continue
                    if line.startswith("#"):
                        continue

                    # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=1 time=219.1 us
                    # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=394 time=1.51 ms
                    m = re.match(r"(?P<lap_time>(\d|\.)+ (us|ms))", line)
                    if m:
                        lt = m.group("lap_time")
                        if lt.endswith(" us"):
                            lt = float(lt[:-3])
                        elif lt.endswith(" ms"):
                            lt = (float(lt[:-3]) * 1000)
                        lap_times.append(lt)
                        continue

                    raise RuntimeError("Unexpected [%s]" % line)
            #Cons.P(len(lap_times))
            fn_cdf = "%s/%s-cdf" % (_dn_output, os.path.basename(fn))
            self.fns_cdf.append(fn_cdf)
            Stat.GenStat(lap_times, fn_cdf)
Esempio n. 2
0
def GetNumAccessesStat():
    fn_out = "%s/cdf-youtube-accesses-per-co" % Conf.DnOut()
    if os.path.exists(fn_out):
        return fn_out

    num_accesses = []
    fn_in = Conf.GetFn("video_accesses_by_COs")
    with open(fn_in) as fo:
        while True:
            line = fo.readline()
            if len(line) == 0:
                break

            line = line.strip()
            if len(line) == 0:
                continue
            if line[0] == "#":
                continue

            # 4 34.3305 -111.091 13
            t = line.split(" ")
            if len(t) != 4:
                raise RuntimeError("Unexpected: [%s]" % line)
            n = int(t[3])
            #Cons.P(n)
            num_accesses.append(n)

            for j in range(n):
                if len(fo.readline()) == 0:
                    raise RuntimeError("Unexpected")

    r = Stat.Gen(num_accesses, fn_out)
    #Cons.P(r)

    return fn_out
Esempio n. 3
0
def add_book_to_database(path):
    filepath = os.path.abspath(os.path.join(root, path))
    print("Ajout du fichier " + path)
    # Vérification de l'extension du fichier
    if filepath.endswith(".pdf"):
        # Lecture du fichier PDF
        pdf = parser.PdfReader(filepath)
        # Récupération des métadonnées du document
        author = pdf.getAuthor()
        title = pdf.getTitle()
        if is_valid(author, title) and not db.book_is_in_database(title, author):
            # Extraction du texte
            try:
                text = pdf.extractText()
            except:
                ()
            else:
                # Récupération des TF de chacun des mots
                occurences = text.getOccurences()
                tfs = st.tf(text.getNumberOfWords(), occurences)
                # Ajout du livre à la base de données
                db.add_book_to_database(title, author, tfs)

                # Enregistrement des modifications
                db.save_database()

                # Affichage du nombre actuel de livres dans la base
                print("Nombre de livres dans la base de données: " + str(db.number_books()))
Esempio n. 4
0
def _GetMemStatByHourFromDstat(fn_ycsb):
    fn_dstat = _GenDstat(fn_ycsb)

    col_time = 21
    col_mem_buff = 13
    #col_mem_cache = 14

    #Cons.P(fn_dstat)
    # Bucketize CPU usage
    #   {hour: [mem_usage]}
    hour_memusage = {}
    with open(fn_dstat) as fo:
        for line in fo:
            if line.startswith("#"):
                continue
            line = line.strip()
            t = re.split(r" +", line)
            time0 = t[col_time - 1]
            mem_buff = int(t[col_mem_buff - 1])
            #Cons.P("%s %d" % (time0, mem_buff))
            hour = int(time0.split(":")[0])
            if hour not in hour_memusage:
                hour_memusage[hour] = []
            hour_memusage[hour].append(mem_buff)

    hour_memstat = {}
    for hour, mem_usage in hour_memusage.iteritems():
        r = Stat.Gen(mem_usage)
        #Cons.P("%d %s" % (hour, r))
        hour_memstat[hour] = r
    return hour_memstat
Esempio n. 5
0
 def __init__(self, standard2LetterName):
   global instantiatedCount
   Base.__init__(self, standard2LetterName)
   self.jogos = []
   self.i = 0  # this is the nDoConc minus 1 pointer
   self.getJogosFromDB()
   self.histG = Stat.makeHistogram(self.jogos)
   self.initializeHistGOfHistG()
Esempio n. 6
0
    def GenStat(self, fn):
        with Cons.MT(fn, print_time=False):
            lap_times = []
            fn0 = "%s/result/%s" % (os.path.dirname(__file__), fn)
            with open(fn0) as fo:
                for line in fo.readlines():
                    line = line.rstrip()
                    if len(line) == 0:
                        continue

                    # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=1 time=219.1 us
                    # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=394 time=1.51 ms
                    m = re.match(
                        r"4 KiB from /mnt/.+/ioping-test-data \(ext4 /dev/xvd.\): request=\d+ time=(?P<lap_time>(\d|\.)+ (us|ms))",
                        line)
                    if m:
                        lt = m.group("lap_time")
                        if lt.endswith(" us"):
                            lt = float(lt[:-3])
                        elif lt.endswith(" ms"):
                            lt = (float(lt[:-3]) * 1000)
                        lap_times.append(lt)
                        continue

                    # --- /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb) ioping statistics ---
                    if re.match(
                            r"--- /mnt/.+/ioping-test-data \(ext4 /dev/xvd.\) ioping statistics ---",
                            line):
                        continue

                    # 1 k requests completed in 175.1 ms, 3.91 MiB read, 5.71 k iops, 22.3 MiB/s
                    # 1 k requests completed in 6.06 s, 3.91 MiB read, 164 iops, 659.8 KiB/s
                    if re.match(
                            r"\d+ k requests completed in .+ (min|s|ms|), .+ MiB read, .+ iops, .+ (K|M)iB/s",
                            line):
                        continue

                    # min/avg/max/mdev = 146.9 us / 175.1 us / 1.77 ms / 79.6 us
                    if re.match(
                            r"min/avg/max/mdev = .+ (u|m)s / .+ (u|m)s / .+ (u|m)s / .+ (u|m)s",
                            line):
                        continue

                    raise RuntimeError("Unexpected [%s]" % line)
            #Cons.P(len(lap_times))
            Stat.GenStat(lap_times, "%s/%s-cdf" % (_dn_stat, fn))

            # Throughput in the time order
            fn_time_order = "%s/%s-time-order" % (_dn_stat, fn)
            with open(fn_time_order, "w") as fo:
                for t in lap_times:
                    fo.write("%s\n" % t)
            Cons.P("Created %s %d" %
                   (fn_time_order, os.path.getsize(fn_time_order)))
Esempio n. 7
0
def recreate():
  print 'recreate() sql tables'
  for whichDB in fSql.DBCONSTANTS:
    #if whichDB == 2:
      #continue
    print 'recreate() sql tables for db=', whichDB
    dbObj = fSql.getDBObj(whichDB)
    if not dbObj:
      continue
    if dbObj.whichDB <> whichDB:
      # this may happen because getDBObj() returns a Sqlite DB object if a MySQL is not available (either MySQLdb module is not available or some other cause like server is offline)
      continue
    dbObj.openConnection()
    conn = dbObj.conn
    createTablesWithConn(conn, whichDB)
    conn.close()
    del dbObj
  for jogoTipo in ['lf','ms']:
    hu.doHistoricoUpdater(jogoTipo)
    Stat.processDBStats(jogoTipo)
Esempio n. 8
0
 def __init__(self, identifier, args):
     self.identifier = identifier
     self.descriptors = Descriptor.descriptors()
     self.attributes = Attribute.attributes()
     self.stats = Stat.stats()
     for a in args:
         as1 = a.split(':')
         category = as1[0]
         as2 = as1[1].split('=')
         command = as2[0]
         args = as2[1]
         self.apply(category, command, args)
Esempio n. 9
0
def _GetCpuStatByHour(fn_ycsb):
    fn_dstat = _GenDstat(fn_ycsb)

    col_time = 17

    col_cpu_idle = 19
    col_cpu_sys = col_cpu_idle + 2
    col_cpu_user = col_cpu_idle + 3
    col_cpu_iowait = col_cpu_idle + 4
    # With SSTable organization computation, there is less CPU usage and a bit more iowait time.
    #   Puzzling. Can't explain why the CPU usage is lower when SSTable organization computation is on
    #   The slightly increased iowait time towards the end might be from the increased amount of log and the overhead of zipping and uploading them.
    which_cpu = "overall"
    #which_cpu = "user"
    #which_cpu = "user+kernel"
    #which_cpu = "iowait"

    #Cons.P(fn_dstat)
    # Bucketize CPU usage
    #   {hour: [cpu_usage]}
    hour_cpuusage = {}
    with open(fn_dstat) as fo:
        for line in fo:
            if line.startswith("#"):
                continue
            line = line.strip()
            t = re.split(r" +", line)
            time0 = t[col_time - 1]

            if which_cpu == "overall":
                cpu = 100.0 - float(t[col_cpu_idle - 1])
            elif which_cpu == "user":
                cpu = float(t[col_cpu_user - 1])
            elif which_cpu == "user+kernel":
                cpu = float(t[col_cpu_user - 1]) + float(t[col_cpu_sys - 1])
            elif which_cpu == "iowait":
                cpu = float(t[col_cpu_iowait - 1])
            else:
                raise RuntimeError("Unexpected")

            #Cons.P("%s %s" % (time0, cpu))
            hour = int(time0.split(":")[0])
            if hour not in hour_cpuusage:
                hour_cpuusage[hour] = []
            hour_cpuusage[hour].append(cpu)

    hour_cpustat = {}
    for hour, cpu_usage in hour_cpuusage.iteritems():
        r = Stat.Gen(cpu_usage)
        #Cons.P("%d %s" % (hour, r))
        hour_cpustat[hour] = r
    return hour_cpustat
Esempio n. 10
0
def scatter_between_class(ds, class_mean, grand_mean):
    classes = Stat.unique(mh.getColumn(ds, len(ds[0]) - 1))
    sb = np.zeros((len(ds[0]) - 1, len(ds[0]) - 1))

    for i in range(len(class_mean)):
        transposta = mh.transposeMatrix(ds.copy())
        transposta = np.array(transposta)
        normal = ds[transposta[len(ds[0]) - 1] == classes[i]]

        meanc = np.asarray(class_mean[i]).reshape(4, 1)
        meang = np.asarray(grand_mean).reshape(4, 1)
        sb += len(normal) * (meanc - meang).dot((meanc - meang).T)
    return sb
Esempio n. 11
0
def grand_mean(ds, column=-1):

    if column > -1:
        classes = Stat.unique(mh.getColumn(ds, len(ds[0]) - 1))

        mean = []

        for c in classes:
            transposta = mh.transposeMatrix(ds.copy())
            transposta = np.array(transposta)
            normal = ds[transposta[len(ds[0]) - 1] == c]
            mean.append([
                Stat.mean(mh.getColumn(normal, i))
                for i in range(0,
                               len(normal[0]) - 1)
            ])

        return mean
    else:
        return [
            Stat.mean(mh.getColumn(ds, i)) for i in range(0,
                                                          len(ds[0]) - 1)
        ]
Esempio n. 12
0
def mat_similarities(tfidfs):
    """Fabrique la matrice de similarite"""
    # Matrice de similarité
    mat_sim = {}
    # Récupération de la liste des identifiants des livres
    id_books = tfidfs.keys()
    # Création des dictionnaires de chaque livre
    for id_book in id_books:
        mat_sim[id_book] = {}
    # Calcul des similarités-cosinus
    for id_book_i in id_books:
        tfidf_book_i = np.array(tfidfs[id_book_i], dtype=np.float)
        for id_book_j in id_books[id_books.index(id_book_i)+1:]:
            tfidf_book_j = np.array(tfidfs[id_book_j], dtype=np.float)
            cos = st.similarity(tfidf_book_i, tfidf_book_j)
            mat_sim[id_book_i][id_book_j] = cos
            mat_sim[id_book_j][id_book_i] = cos
    return mat_sim
Esempio n. 13
0
def scatter_within_class(ds, class_mean):
    classes = Stat.unique(mh.getColumn(ds, len(ds[0]) - 1))
    sw = np.zeros((len(ds[0]) - 1, len(ds[0]) - 1))

    for i in range(len(classes)):
        si = np.zeros((len(ds[0]) - 1, len(ds[0]) - 1))

        transposta = mh.transposeMatrix(ds.copy())
        transposta = np.array(transposta)
        normal = ds[transposta[len(ds[0]) - 1] == classes[i]]

        for j in range(len(normal)):
            row = normal[j]
            row = row[0:-1].reshape(4, 1)
            mean = np.asarray(class_mean[i]).reshape(4, 1)

            si += (row - mean).dot((row - mean).T)
        sw += si

    return sw
Esempio n. 14
0
def tfidfs():
    """Calcule le TF-IDF de tous les livres de la base"""
    # Récupération de la liste des id_book de la base
    id_books = db.get_id_books()
    # Nombre de livres dans la base
    nb_books = len(id_books)
    # Récupération, pour chaque mot, du nombre de livres où chaque mot apparaît
    occ_in_books = db.dic_idword_nbbooks()
    # Construction d'un dictionnaire des IDF de la base de données
    dic_idf = st.dic_idf(nb_books, occ_in_books)
    # Construction d'un dictionnaire associant son TF-IDF à chaque id_book
    tfidfs = {}
    for id_book in id_books:
        dic_tf = db.dic_tf_book(id_book)
        tfidf = []
        for idword in dic_idf:
            tfidf.append(float(dic_tf.get(idword, 0)) * float(dic_idf[idword]))
        tfidfs[id_book] = tfidf
        # print("TFIDF du livre " + str(id_book) + " calculé")
    return tfidfs
Esempio n. 15
0
    def GenStat(self, fn):
        with Cons.MT(fn, print_time=False):
            thrp = []
            with open(fn) as fo:
                for line in fo.readlines():
                    line = line.rstrip()
                    if len(line) == 0:
                        continue
                    if line.startswith("#"):
                        continue

                    # 0.348919 s, 192 MB/s
                    m = re.match(r"(?P<lap_time>(\d|\.)+) s, .+", line)
                    if m:
                        thrp.append(64.0 / float(m.group("lap_time")))
                        continue
                    raise RuntimeError("Unexpected %s" % line)
            #Cons.P(len(thrp))
            fn_cdf = "%s/%s-cdf" % (_dn_output, os.path.basename(fn))
            self.fns_cdf.append(fn_cdf)
            Stat.GenStat(thrp, fn_cdf)
    def parse(self, filename):
        players = []
        with open(filename, "rb") as csvfile:
            reader = csv.reader(csvfile, delimiter=",")
            for row in reader:
                players.append([ele for ele in row if ele])

        self.header = players[0]

        for player in players:
            if player[0] == "Player":
                continue
            stats = []
            i = 0
            for ele in player:
                if i >= 28:
                    break
                stat = Stat.Stat(self.header[i], ele)
                stats.append(stat)
                i += 1
            season_player = Player.Player(stats)
            self.season_players.append(season_player)
Esempio n. 17
0
    def GenStat(self, fn):
        with Cons.MT(fn, print_time=False):
            thrp = []
            fn0 = "%s/result/%s" % (os.path.dirname(__file__), fn)
            with open(fn0) as fo:
                for line in fo.readlines():
                    if line.startswith("1+0 records in"):
                        continue
                    if line.startswith("1+0 records out"):
                        continue
                    if line.startswith("real"):
                        continue
                    if line.startswith("user"):
                        continue
                    if line.startswith("sys"):
                        continue

                    # 134217728 bytes (134 MB) copied, 0.851289 s, 158 MB/s
                    #m = re.match(r"\d+ bytes \(\d+ MB\) copied, (?P<lap_time>(\d|\.)+) s, .+", line)
                    m = re.match(
                        r"134217728 bytes \(134 MB\) copied, (?P<lap_time>(\d|\.)+) s, .+",
                        line)
                    if m:
                        #Cons.P(m.group("lap_time"))
                        thrp.append(128.0 / float(m.group("lap_time")))
                        continue
                    raise RuntimeError("Unexpected %s" % line)
            #Cons.P(len(thrp))
            Stat.GenStat(thrp, "%s/%s-cdf" % (_dn_stat, fn))

            # Throughput in the time order
            fn_time_order = "%s/%s-time-order" % (_dn_stat, fn)
            with open(fn_time_order, "w") as fo:
                for t in thrp:
                    fo.write("%s\n" % t)
            Cons.P("Created %s %d" %
                   (fn_time_order, os.path.getsize(fn_time_order)))
Esempio n. 18
0
def GenMemStatByHour(dn_log_job, exp_dt):
    #Cons.P("%s %s" % (dn_log_job, exp_dt))
    fn = "%s/procmon/%s" % (dn_log_job, exp_dt)
    if not os.path.exists(fn):
        fn_zipped = "%s.bz2" % fn
        if not os.path.exists(fn_zipped):
            raise RuntimeError("Unexpected: %s" % fn)
        Util.RunSubp("cd %s && bzip2 -dk %s > /dev/null" %
                     (os.path.dirname(fn_zipped), os.path.basename(fn_zipped)))
    if not os.path.exists(fn):
        raise RuntimeError("Unexpected")

    exp_begin_dt = datetime.datetime.strptime(exp_dt, "%y%m%d-%H%M%S.%f")

    # man proc. statm

    hour_mems = {}
    with open(fn) as fo:
        for line in fo:
            t = line.strip().split()
            dt = datetime.datetime.strptime(t[0], "%y%m%d-%H%M%S")
            rss = int(t[2]) * 4096
            #Cons.P("%s %d" % (dt, rss))

            # Convert to relative time
            rel_dt = dt - exp_begin_dt
            totalSeconds = rel_dt.seconds
            h, remainder = divmod(totalSeconds, 3600)

            if h not in hour_mems:
                hour_mems[h] = []
            hour_mems[h].append(rss)

    hour_memstat = {}
    for h, mems in sorted(hour_mems.iteritems()):
        hour_memstat[h] = Stat.Gen(mems)
    return hour_memstat
Esempio n. 19
0
def _GetCpuStatByHour(fn_ycsb):
    (fn_dstat, num_stgdevs) = DstatLog.GetPlotFn(fn_ycsb)

    col_time = 17

    col_cpu_idle = 19
    col_cpu_sys = col_cpu_idle + 2
    col_cpu_user = col_cpu_idle + 3
    col_cpu_iowait = col_cpu_idle + 4

    #Cons.P(fn_dstat)
    # Bucketize CPU usage
    #   {hour: [cpu_usage]}
    hour_cpuusage = {}
    with open(fn_dstat) as fo:
        for line in fo:
            if line.startswith("#"):
                continue
            line = line.strip()
            t = re.split(r" +", line)
            time0 = t[col_time - 1]

            cpu = 100.0 - float(t[col_cpu_idle - 1])

            #Cons.P("%s %s" % (time0, cpu))
            hour = int(time0.split(":")[0])
            if hour not in hour_cpuusage:
                hour_cpuusage[hour] = []
            hour_cpuusage[hour].append(cpu)

    hour_cpustat = {}
    for hour, cpu_usage in hour_cpuusage.iteritems():
        r = Stat.Gen(cpu_usage)
        #Cons.P("%d %s" % (hour, r))
        hour_cpustat[hour] = r
    return hour_cpustat
Esempio n. 20
0
    def calcAnovaWave(self):
        # calculates Anova on wave and/or GFP
        if self.AnovaCheck:

            self.Wave = Stat.Anova(self.H5, self.Mainframe)
            # Parametric Analysis
            if self.AnovaParam:
                if self.AnalyseType in ["GFP Only", "Both"] \
                        and self.doAnovaParamGFP:
                    self.Wave.Param(DataGFP=True)
                    self.progressTxt.append('Parametric Anova (GFP) : %s' %
                                            self.Wave.elapsedTime)


                if self.AnalyseType in ["All Electrodes", "Both"] \
                        and self.doAnovaParamElect:
                    self.Wave.Param()
                    self.progressTxt.append(
                        'Parametric Anova (All Electrodes) : %s' %
                        self.Wave.elapsedTime)

            # Non Parametric Analysis
            else:
                if self.AnalyseType in ["GFP Only", "Both"] \
                        and self.doAnovaNonParamGFP:
                    self.Wave.NonParam(self.AnovaIteration, DataGFP=True)
                    self.progressTxt.append('Non-Parametric Anova (GFP) : %s' %
                                            self.Wave.elapsedTime)

                if self.AnalyseType in ["All Electrodes", "Both"] \
                        and self.doAnovaNonParamElect:
                    self.Wave.NonParam(self.AnovaIteration)
                    self.progressTxt.append(
                        'Non-Parametric Anova (All Electrodes) : %s' %
                        self.Wave.elapsedTime)

            # Makes sure that the h5 files are always closed at the end
            self.cancel = self.Wave.cancel
            self.Wave.file.close()

        # calculates PostHoc on wave and/or GFP
        if self.PostHocCheck:

            self.WavePostHoc = Stat.PostHoc(self.H5, self.Mainframe)

            # Parametric
            if self.PostHocParam:
                if self.AnalyseType in ["GFP Only", "Both"] \
                        and self.doPostHocParamGFP:
                    self.WavePostHoc.Param(DataGFP=True)
                    self.progressTxt.append('Parametric PostHoc (GFP) : %s' %
                                            self.WavePostHoc.elapsedTime)

                if self.AnalyseType in ["All Electrodes", "Both"] \
                        and self.doPostHocParamElect:
                    self.WavePostHoc.Param()
                    self.progressTxt.append(
                        'Parametric PostHoc (All Electrodes) : %s' %
                        self.WavePostHoc.elapsedTime)

            # Non Parametric
            else:
                if self.AnalyseType in ["GFP Only", "Both"] \
                        and self.doPostHocNonParamGFP:
                    self.WavePostHoc.NonParam(self.PostHocIteration,
                                              DataGFP=True)
                    self.progressTxt.append(
                        'Non-Parametric PostHoc (GFP) : %s' %
                        self.WavePostHoc.elapsedTime)

                if self.AnalyseType in ["All Electrodes", "Both"] \
                        and self.doPostHocNonParamElect:
                    self.WavePostHoc.NonParam(self.PostHocIteration)
                    self.progressTxt.append(
                        'Non-Parametric PostHoc (All Electrodes) : %s' %
                        self.WavePostHoc.elapsedTime)
            # Makes sure that the h5 files are always closed at the end
            self.cancel = self.WavePostHoc.cancel
            self.WavePostHoc.file.close()

        # Multiple testing and writing Data
        if self.SpaceFile == '':
            self.SpaceFile = None
        Correction = PostStat.MultipleTestingCorrection(
            self.H5,
            self.Mainframe,
            TF=self.PtsConsec,
            Alpha=self.Alpha,
            SpaceCont=self.Clust,
            SpaceFile=self.SpaceFile)
        Correction.Calculation()
        self.Param = {'Anova': self.AnovaParam, 'PostHoc': self.PostHocParam}
        if self.AnalyseType in ["GFP Only", "Both"]:
            Writing = PostStat.WriteData(self.PathResult,
                                         self.H5,
                                         self.Param,
                                         DataGFP=True)
            Writing.StatistcalData(Correction.CorrectedMask)
            Writing.IntermediateResult()
            Writing.file.close()
        if self.AnalyseType in ["All Electrodes", "Both"]:
            Writing = PostStat.WriteData(self.PathResult,
                                         self.H5,
                                         self.Param,
                                         DataGFP=False)
            Writing.StatistcalData(Correction.CorrectedMask)
            Writing.IntermediateResult()
            Writing.file.close()
###  

predictTrains = []
predictTests = []

dataAccuracy = []
testAccuracy = []

for i in range(5):
    sgd = SGD.SGD(r=best_r,epochs=best_epoch,W0=[0]*len(data[i][0]))
    sgd.fit(data[i],data_labels)
    
    predictTrains.append(sgd.predict(data[i]))
    predictTests.append(sgd.predict(testset[i]))
    
    dataAccuracy.append(Stat.F1_Score(sgd.predict(data[i]), data_labels))
    testAccuracy.append(Stat.F1_Score(sgd.predict(testset[i]), test_labels))

trainT = np.asarray(predictTrains).T.tolist()
testT = np.asarray(predictTests).T.tolist()

predictTrain = []
for i in range(len(data[0])):
    probPos = 0
    probNeg = 0
    for j in range(5):
        if predictTrains[j][i] == 1:
            probPos += dataAccuracy[j]
        else:
            probNeg += dataAccuracy[j]
    if probPos > probNeg:
Esempio n. 22
0
                            'class', Globals.euclidean)
data = result['aggregated_confusion_matrix']
list_accuracy = list(result[i]['class_stat']['accuracy'] for i in range(10))

print 'Number of instances : ', result['number_instance']
print 'Number of Features : ', len(result['column_list']) - 1
print 'Classes : ', result['list_classes']
print 'Confusion Matrix for the dataset over 10 runs :'
for i in data.keys():
    print i, ' ',
    for j in data.keys():
        print data[i][j], ' ',
    print ''

print 'Accuracy for 10 runs: ', list_accuracy
print 'Mean Accuracy : ', Stat.mean(list_accuracy)
print 'Variance : ', Stat.variance(list_accuracy)
print 'Standard Deviation : ', Stat.standard_deviation(list_accuracy)

plt.xlabel('sepal width in cm')
plt.ylabel('petal width in cm')

x, y, class_column_name = 'sepal width in cm', 'petal width in cm', result[
    'class_column_name']
new_train_list = sorted(result['training_dataset'],
                        key=lambda k: (float(k[y]), float(k[x])),
                        reverse=True)
new_test_list = sorted(result['test_dataset'],
                       key=lambda k: (float(k[y]), float(k[x])),
                       reverse=True)
Esempio n. 23
0
def main(inF, outF):
    # use inline command holding the file name
    file = inF

    # read in input file separated by sheets
    try:
        dataClasses = pd.read_excel(file,
                                    sheet_name='Schedule')  # reading file
    except Exception:
        raise Exception(
            "Error: There is no table in your classroom file titled 'Schedule'."
        )
    try:
        dataRooms = pd.read_excel(file, sheet_name='Capacity')  # reading file
    except Exception:
        raise Exception(
            "Error: There is no table in your classroom file titled 'Capacity'."
        )
    try:
        dataBuild = pd.read_excel(file, sheet_name='Coords')  # reading file
    except Exception:
        raise Exception(
            "Error: There is no table in your classroom file titled 'Coords'.")

    # convert pandas data frame into raw values
    courses = dataClasses.values
    rooms = dataRooms.values
    buildings = dataBuild.values

    # initialize arrays to hold room and course objects
    courseList = []
    roomList = []
    buildList = []
    subjectTobuilding = {}

    # create schedule object
    spring2020 = Schedule()

    # import time slots into schedule based on days
    for i in dataClasses.Time:
        # if statement to separate slots based on days and to avoid repeats
        if (("mw" in i) or ("MW" in i)) and not (i in spring2020.mw):
            spring2020.mw.append(i.lower())
        if (("tt" in i) or ("TT" in i)) and not (i in spring2020.tt):
            spring2020.tt.append(i.lower())

        if (("MWF" in i) or ("mwf" in i)) and not (i in spring2020.mwf):
            spring2020.mwf.append(i.lower())

    # create courses and add to Course list
    for i in courses:
        # (self, subject, course, title, ver, sec, professor, time, cap):
        courseList.append(
            Course(i[0], str(i[1]), i[2], i[3], i[4], i[5], i[6].lower(),
                   i[7]))

    # convert given time value into to an easier to read and understand format
    for i in courseList:
        if "mw" in i.time:
            if "mwf" in i.time:
                i.days = "Mon/Wed/Fri"
                temp = i.time.split("mwf")
                i.Mtime = convert_Time(temp)
            else:
                i.days = "Mon/Wed"
                temp = i.time.split("mw")
                i.Mtime = convert_Time(temp)
        if "tt" in i.time:
            i.days = "Tues/Thurs"
            temp = i.time.split("tt")
            i.Mtime = convert_Time(temp)

    # create rooms and add to room list
    for i in rooms:
        roomList.append(Room(i[0], i[1]))

    # create building objects and add them to list
    for i in range(len(buildings)):
        buildList.append(
            Building(buildings[i][0], buildings[i][1], buildings[i][2],
                     buildings[i][3]))
        subjectTobuilding[buildList[i].subject] = buildList[i].name

    # warning file for error output during schedule generation
    warningTextFile = "warning.txt"
    fo = open(warningTextFile, "w")
    fo.write("Warnings:\n")
    fo.close()

    # make 5 copies of schedule, courseList, and roomList
    S1 = copy.deepcopy(spring2020)
    S1c = copy.deepcopy(courseList)
    S1r = copy.deepcopy(roomList)
    S2 = copy.deepcopy(spring2020)
    S2c = copy.deepcopy(courseList)
    S2r = copy.deepcopy(roomList)
    S3 = copy.deepcopy(spring2020)
    S3c = copy.deepcopy(courseList)
    S3r = copy.deepcopy(roomList)
    S4 = copy.deepcopy(spring2020)
    S4c = copy.deepcopy(courseList)
    S4r = copy.deepcopy(roomList)
    S5 = copy.deepcopy(spring2020)
    S5c = copy.deepcopy(courseList)
    S5r = copy.deepcopy(roomList)

    # First generated schedule rooms and courses in given order
    generate_schedule(S1, S1c, S1r, buildList, subjectTobuilding)
    # set main schedule to S1
    spring2020 = copy.deepcopy(S1)
    courseList = copy.deepcopy(S1c)

    # organize courseList by capacity
    S2c.sort(key=lambda course: course.cap)

    # second generated schedule room in given order courseList organized by cap G -> L
    generate_schedule(S2, S2c, S2r, buildList, subjectTobuilding)
    # if schedule has less unscheduled classes than spring2020 make this spring 2020
    if len(S2.unScheduled) < len(spring2020.unScheduled):
        spring2020 = copy.deepcopy(S2)
        courseList = copy.deepcopy(S2c)

    S3c.sort(key=lambda course: course.cap, reverse=True)
    # third generated schedule room in given order courseList organized by cap L -> G
    generate_schedule(S3, S3c, S3r, buildList, subjectTobuilding)
    if len(S3.unScheduled) < len(spring2020.unScheduled):
        spring2020 = copy.deepcopy(S3)
        courseList = copy.deepcopy(S3c)

    # sort room list by capacity
    S4r.sort(key=lambda room: room.cap)

    S4c.sort(key=lambda course: course.cap)
    # fourth generated schedule room organized by cap G -> l courseList organized by cap G -> l
    generate_schedule(S4, S4c, S4r, buildList, subjectTobuilding)
    if len(S4.unScheduled) < len(spring2020.unScheduled):
        spring2020 = copy.deepcopy(S4)
        courseList = copy.deepcopy(S4c)

    S5c.sort(key=lambda course: course.cap, reverse=True)
    S5r.sort(key=lambda room: room.cap)

    # fourth generated schedule room organized by cap G -> l courseList organized by cap L -> G
    generate_schedule(S5, S5c, S5r, buildList, subjectTobuilding)
    if len(S5.unScheduled) < len(spring2020.unScheduled):
        spring2020 = copy.deepcopy(S5)
        courseList = copy.deepcopy(S5c)

    # sort spring 2020 by time in days
    spring2020.solution[0].sort(key=lambda course: course.Mtime.hour)
    spring2020.solution[1].sort(key=lambda course: course.Mtime.hour)
    spring2020.solution[2].sort(key=lambda course: course.Mtime.hour)
    spring2020.solution[3].sort(key=lambda course: course.Mtime.hour)
    spring2020.solution[4].sort(key=lambda course: course.Mtime.hour)

    # write schedule to output file
    generate_output(spring2020, courseList, outF)
    # print_schedule(spring2020)
    Stat.main(outF)
Esempio n. 24
0
    def __init__(self, exp_set_id, stg_dev):
        conf_sd = Conf.Get(exp_set_id)[stg_dev]

        t = conf_sd["jobid_expdt"].split("/")
        job_id = t[0]
        exp_dt = t[1]

        t = conf_sd["time_window"].split("-")
        exp_time_begin = t[0]
        exp_time_end = t[1]

        dn_log = Conf.GetDir("dn")
        dn_log_job = "%s/%s" % (dn_log, job_id)

        self.fn_out = "%s/ycsb-by-time-%s" % (Conf.GetOutDir(), exp_dt)
        if os.path.isfile(self.fn_out):
            return

        self.exp_begin_dt = datetime.datetime.strptime(exp_dt,
                                                       "%y%m%d-%H%M%S.%f")
        #Cons.P(self.exp_begin_dt)

        with Cons.MT("Generating ycsb time-vs-metrics file for plot ..."):
            fn_log_ycsb = "%s/ycsb/%s-d" % (dn_log_job, exp_dt)
            # Unzip when the file is not there
            if not os.path.exists(fn_log_ycsb):
                fn_zipped = "%s.bz2" % fn_log_ycsb
                if not os.path.exists(fn_zipped):
                    raise RuntimeError("Unexpected: %s" % fn_log_ycsb)
                Util.RunSubp(
                    "cd %s && bzip2 -dk %s > /dev/null" %
                    (os.path.dirname(fn_zipped), os.path.basename(fn_zipped)))
            if not os.path.exists(fn_log_ycsb):
                raise RuntimeError("Unexpected")

            mo_list = []
            line_params = None
            line_run = None
            with open(fn_log_ycsb) as fo:
                for line in fo:
                    #Cons.P(line)
                    # 2017-10-13 20:41:01:258 2 sec: 34 operations; 34 current ops/sec; est completion in 68 days 1 hours [READ: Count=28, Max=46943, Min=33,
                    # Avg=32239.54, 90=45343, 99=46943, 99.9=46943, 99.99=46943] [INSERT: Count=8, Max=9343, Min=221, Avg=4660.88, 90=8695, 99=9343, 99.9=9343,
                    # 99.99=9343]
                    mo = re.match(r"\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d:\d\d\d (?P<rel_time>\d+) sec: \d+ operations; " \
                        "(?P<db_iops>(\d|\.)+) current ops\/sec; .*" \
                        "\[READ: Count=(?P<r_cnt>\d+), Max=(?P<r_max>\d+), Min=(?P<r_min>\d+), Avg=(?P<r_avg>(\d|\.)+)," \
                        " 90=(?P<r_90>\d+), 99=(?P<r_99>\d+), 99.9=(?P<r_999>\d+), 99.99=(?P<r_9999>\d+)\] " \
                        "\[INSERT: Count=(?P<w_cnt>\d+), Max=(?P<w_max>\d+), Min=(?P<w_min>\d+), Avg=(?P<w_avg>(\d|\.)+)," \
                        " 90=(?P<w_90>\d+), 99=(?P<w_99>\d+), 99.9=(?P<w_999>\d+), 99.99=(?P<w_9999>\d+)\] " \
                        , line)
                    if mo is not None:
                        total_seconds = int(mo.group("rel_time"))
                        s = total_seconds % 60
                        total_seconds -= s
                        total_mins = total_seconds / 60
                        m = total_mins % 60
                        total_mins -= m
                        h = total_mins / 60
                        rel_time = "%02d:%02d:%02d" % (h, m, s)
                        mo_list.append((rel_time, mo))
                        continue

                    if line.startswith("params = {"):
                        line_params = line
                        continue

                    if line.startswith("run = {"):
                        line_run = line
                        continue

            cnt = 0
            db_iops = []
            r_cnt = 0
            r_avg = 0.0
            r_min = 0
            r_max = 0
            r_90 = 0
            r_99 = 0
            r_999 = 0
            r_9999 = 0
            w_cnt = 0
            w_avg = 0.0
            w_min = 0
            w_max = 0
            w_90 = 0
            w_99 = 0
            w_999 = 0
            w_9999 = 0
            for e in mo_list:
                rel_time = e[0]
                if (exp_time_begin < rel_time) and (rel_time < exp_time_end):
                    mo = e[1]
                    db_iops.append(float(mo.group("db_iops")))
                    r_cnt += int(mo.group("r_cnt"))
                    r_avg += float(mo.group("r_avg"))
                    r_min += int(mo.group("r_min"))
                    r_max += int(mo.group("r_max"))
                    r_90 += int(mo.group("r_90"))
                    r_99 += int(mo.group("r_99"))
                    r_999 += int(mo.group("r_999"))
                    r_9999 += int(mo.group("r_9999"))
                    w_cnt += int(mo.group("w_cnt"))
                    w_avg += float(mo.group("w_avg"))
                    w_min += int(mo.group("w_min"))
                    w_max += int(mo.group("w_max"))
                    w_90 += int(mo.group("w_90"))
                    w_99 += int(mo.group("w_99"))
                    w_999 += int(mo.group("w_999"))
                    w_9999 += int(mo.group("w_9999"))
                    cnt += 1

            db_iops_stat = Stat.Gen(db_iops)

            with open(self.fn_out, "w") as fo_out:
                fo_out.write("# %s" % line_params)
                fo_out.write("# %s" % line_run)
                fo_out.write("\n")
                fo_out.write("# In the time range (%s, %s):\n" %
                             (exp_time_begin, exp_time_end))
                fo_out.write("#   db_iops.avg= %14f\n" % db_iops_stat.avg)
                fo_out.write("#   db_iops.min= %14f\n" % db_iops_stat.min)
                fo_out.write("#   db_iops.max= %14f\n" % db_iops_stat.max)
                fo_out.write("#   db_iops._25= %14f\n" % db_iops_stat._25)
                fo_out.write("#   db_iops._50= %14f\n" % db_iops_stat._50)
                fo_out.write("#   db_iops._75= %14f\n" % db_iops_stat._75)
                fo_out.write("#   r_cnt  = %14f\n" % (float(r_cnt) / cnt))
                fo_out.write("#   r_avg  = %14f\n" % (float(r_avg) / cnt))
                fo_out.write("#   r_min  = %14f\n" % (float(r_min) / cnt))
                fo_out.write("#   r_max  = %14f\n" % (float(r_max) / cnt))
                fo_out.write("#   r_90   = %14f\n" % (float(r_90) / cnt))
                fo_out.write("#   r_99   = %14f\n" % (float(r_99) / cnt))
                fo_out.write("#   r_999  = %14f\n" % (float(r_999) / cnt))
                fo_out.write("#   r_9999 = %14f\n" % (float(r_9999) / cnt))
                fo_out.write("#   w_cnt  = %14f\n" % (float(w_cnt) / cnt))
                fo_out.write("#   w_avg  = %14f\n" % (float(w_avg) / cnt))
                fo_out.write("#   w_min  = %14f\n" % (float(w_min) / cnt))
                fo_out.write("#   w_max  = %14f\n" % (float(w_max) / cnt))
                fo_out.write("#   w_90   = %14f\n" % (float(w_90) / cnt))
                fo_out.write("#   w_99   = %14f\n" % (float(w_99) / cnt))
                fo_out.write("#   w_999  = %14f\n" % (float(w_999) / cnt))
                fo_out.write("#   w_9999 = %14f\n" % (float(w_9999) / cnt))
                fo_out.write("\n")

                fmt = "%8s" \
                    " %9.2f" \
                    " %6d %8.2f %3d %6d" \
                    " %6d %6d %6d %6d" \
                    " %6d %8.2f %3d %6d" \
                    " %6d %6d %6d %6d"
                header = Util.BuildHeader(fmt, "rel_time" \
                      " db_iops" \
                      " read_cnt read_lat_avg read_lat_min read_lat_max" \
                      " read_lat_90p read_lat_99p read_lat_99.9p read_lat_99.99p" \
                      " write_cnt write_lat_avg write_lat_min write_lat_max" \
                      " write_lat_90p write_lat_99p write_lat_99.9p write_lat_99.99p" \
                      )

                i = 0
                for e in mo_list:
                    rel_time = e[0]
                    mo = e[1]
                    if i % 40 == 0:
                        fo_out.write(header + "\n")
                    fo_out.write(
                        (fmt + "\n") %
                        (rel_time, float(mo.group("db_iops")),
                         int(mo.group("r_cnt")), float(mo.group("r_avg")),
                         int(mo.group("r_min")), int(mo.group("r_max")),
                         int(mo.group("r_90")), int(mo.group("r_99")),
                         int(mo.group("r_999")), int(mo.group("r_9999")),
                         int(mo.group("w_cnt")), float(mo.group("w_avg")),
                         int(mo.group("w_min")), int(mo.group("w_max")),
                         int(mo.group("w_90")), int(mo.group("w_99")),
                         int(mo.group("w_999")), int(mo.group("w_9999"))))
                    i += 1
            Cons.P("Created %s %d" %
                   (self.fn_out, os.path.getsize(self.fn_out)))
Esempio n. 25
0
def stat(filepath):
  info, err = Stat(filepath)
  if err:
    raise OSError(err.Error())
  return StatResult(info)
Esempio n. 26
0
         for epoch in [20]:#,10,15,25]:
             for g0 in [1.1,1.01,1.001]:
                 tmp = []
           
                 sgd = SGD_SVM.SGDSVM(C=C,ro=ro,epochs=epoch,W0=[0]*len(phiTrain[0]),gamma0=g0)
                 kfold = KFold.KFold(n_splits=5)
           
                 for kf in kfold.split(phiTrain): 
                     train2 = [phiTrain[i] for i in kf[0]]
                     train_label2 = [data_labels[i] for i in kf[0]]
                     test2 = [phiTrain[i] for i in kf[1]]
                     test_label2 = [data_labels[i] for i in kf[1]]
                                   
                     sgd.fit(train2, train_label2)
                     predict_tmp = sgd.predict(test2)
                     tmp.append(Stat.Accuracy(predict_tmp,test_label2))
                   
                 if np.mean(tmp) > best_accuracy:
                     best_accuracy = np.mean(tmp)
                     best_C = C
                     best_epoch = epoch
                     best_g0 = g0
                     best_ro = ro
 
 ###
 
 print("mid cross-validation")
 
 sgd = SGD_SVM.SGDSVM(C=best_C,ro=best_ro,epochs=best_epoch,W0=[0]*len(phiTrain[0]),gamma0=best_g0)
 sgd.fit(phiTrain,train_label)
 
Esempio n. 27
0
    def __init__(self, fn_in, time_begin, time_end, overloaded):
        self.overloaded = overloaded

        # Unzip when the file is not there
        if not os.path.exists(fn_in):
            fn_zipped = "%s.bz2" % fn_in
            if not os.path.exists(fn_zipped):
                raise RuntimeError("Unexpected: %s" % fn_in)
            Util.RunSubp(
                "cd %s && bzip2 -dk %s > /dev/null" %
                (os.path.dirname(fn_zipped), os.path.basename(fn_zipped)))
        if not os.path.exists(fn_in):
            raise RuntimeError("Unexpected")
        #Cons.P(fn_in)

        mo_list = []
        line_params = None
        line_run = None
        with open(fn_in) as fo:
            for line in fo:
                #Cons.P(line)
                # 2017-10-13 20:41:01:258 2 sec: 34 operations; 34 current ops/sec; est completion in 68 days 1 hours [READ: Count=28, Max=46943, Min=33,
                # Avg=32239.54, 90=45343, 99=46943, 99.9=46943, 99.99=46943] [INSERT: Count=8, Max=9343, Min=221, Avg=4660.88, 90=8695, 99=9343, 99.9=9343,
                # 99.99=9343]
                mo = re.match(r"\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d:\d\d\d (?P<rel_time>\d+) sec: \d+ operations; " \
                    "(?P<db_iops>(\d|\.)+) current ops\/sec; .*" \
                    "\[READ: Count=(?P<r_cnt>\d+), Max=(?P<r_max>\d+), Min=(?P<r_min>\d+), Avg=(?P<r_avg>(\d|\.)+)," \
                    " 90=(?P<r_90>\d+), 99=(?P<r_99>\d+), 99.9=(?P<r_999>\d+), 99.99=(?P<r_9999>\d+)\] " \
                    "\[INSERT: Count=(?P<w_cnt>\d+), Max=(?P<w_max>\d+), Min=(?P<w_min>\d+), Avg=(?P<w_avg>(\d|\.)+)," \
                    " 90=(?P<w_90>\d+), 99=(?P<w_99>\d+), 99.9=(?P<w_999>\d+), 99.99=(?P<w_9999>\d+)\] " \
                    , line)
                if mo is None:
                    continue

                total_seconds = int(mo.group("rel_time"))
                s = total_seconds % 60
                total_seconds -= s
                total_mins = total_seconds / 60
                m = total_mins % 60
                total_mins -= m
                h = total_mins / 60
                rel_time = "%02d:%02d:%02d" % (h, m, s)
                if (time_begin <= rel_time) and (rel_time <= time_end):
                    mo_list.append((rel_time, mo))
        if len(mo_list) == 0:
            raise RuntimeError("Unexpected. Check file [%s]" % fn_in)

        cnt = 0
        db_iops = []
        r_cnt = 0
        r_avg = 0.0
        r_min = 0
        r_max = 0
        r_90 = 0
        r_99 = 0
        r_999 = 0
        r_9999 = 0
        w_cnt = 0
        w_avg = 0.0
        w_min = 0
        w_max = 0
        w_90 = 0
        w_99 = 0
        w_999 = 0
        w_9999 = 0
        for e in mo_list:
            rel_time = e[0]
            mo = e[1]
            db_iops.append(float(mo.group("db_iops")))
            r_cnt += int(mo.group("r_cnt"))
            r_avg += float(mo.group("r_avg"))
            r_min += int(mo.group("r_min"))
            r_max += int(mo.group("r_max"))
            r_90 += int(mo.group("r_90"))
            r_99 += int(mo.group("r_99"))
            r_999 += int(mo.group("r_999"))
            r_9999 += int(mo.group("r_9999"))
            w_cnt += int(mo.group("w_cnt"))
            w_avg += float(mo.group("w_avg"))
            w_min += int(mo.group("w_min"))
            w_max += int(mo.group("w_max"))
            w_90 += int(mo.group("w_90"))
            w_99 += int(mo.group("w_99"))
            w_999 += int(mo.group("w_999"))
            w_9999 += int(mo.group("w_9999"))
            cnt += 1

        self.db_iops_stat = Stat.Gen(db_iops)

        self.r_cnt = r_cnt
        self.r_avg = (float(r_avg) / cnt)
        self.r_min = (float(r_min) / cnt)
        self.r_max = (float(r_max) / cnt)
        self.r_90 = (float(r_90) / cnt)
        self.r_99 = (float(r_99) / cnt)
        self.r_999 = (float(r_999) / cnt)
        self.r_9999 = (float(r_9999) / cnt)
        self.w_cnt = (float(w_cnt) / cnt)
        self.w_avg = (float(w_avg) / cnt)
        self.w_min = (float(w_min) / cnt)
        self.w_max = (float(w_max) / cnt)
        self.w_90 = (float(w_90) / cnt)
        self.w_99 = (float(w_99) / cnt)
        self.w_999 = (float(w_999) / cnt)
        self.w_9999 = (float(w_9999) / cnt)
Esempio n. 28
0
def KMeans(ds, k):

    number_centroids = k

    x = MatrixHandler.transposeMatrix(ds)

    random_centroids = []
    #For each centroids
    for i in range(0, number_centroids):
        random_sample = randrange(len(x[0]))
        dim = []
        #For each dimension of X
        for j in range(len(x)):
            dim.append(x[j][random_sample])
        random_centroids.append(dim)
    random_centroids = np.asarray(random_centroids)

    #Get clusters
    cluster = get_clusters(random_centroids, k, ds)

    #    t=x[0]
    #    y=x[1]
    #    classes = cluster
    #    unique = list(set(classes))
    #    colors = [plt.cm.jet(float(i)/max(unique)) for i in unique]
    #    for i, u in enumerate(unique):
    #        xi = [t[j] for j  in range(len(t)) if classes[j] == u]
    #        yi = [y[j] for j  in range(len(t)) if classes[j] == u]
    #        plt.scatter(xi, yi, c=colors[i], label=str(u))
    #    plt.legend()
    #    random_centroids = MatrixHandler.transposeMatrix(random_centroids)
    #    plt.scatter(random_centroids[0], random_centroids[1], color='red')
    #    plt.show()

    last_centroids = random_centroids

    is_eq_last = False

    while (not is_eq_last):
        new_centroids = []

        for c in Stat.unique(cluster):
            new_centroids.append(NDMean(ds[np.asarray(cluster) == c]).tolist())
        new_centroids = np.asarray(new_centroids)
        is_eq_last = np.array_equal(new_centroids, last_centroids)

        last_centroids = new_centroids

        cluster = get_clusters(new_centroids, k, ds)

#        t=x[0]
#        y=x[1]
#        classes = cluster
#        unique = list(set(classes))
#        colors = [plt.cm.jet(float(i)/max(unique)) for i in unique]
#        for i, u in enumerate(unique):
#            xi = [t[j] for j  in range(len(t)) if classes[j] == u]
#            yi = [y[j] for j  in range(len(t)) if classes[j] == u]
#            plt.scatter(xi, yi, c=colors[i], label=str(u))
#        plt.legend()
#        new_centroids = MatrixHandler.transposeMatrix(new_centroids)
#        plt.scatter(new_centroids[0], new_centroids[1], color='red')
#        plt.show()

#    print(ds)
#    print(cluster)

    return np.append(ds, np.asarray(cluster).reshape(len(cluster), 1),
                     axis=1), last_centroids
def checkLandscape(path):
    # test_imagepath = path
    test_hist = returnHistogram(path)
    test_sky = returnSky(path)

    part_t = 0
    iteration = 0


    neurons = []
    neurons_sky = []
    for i in range(neurons_amount):
        neurons.append(Neuron(input_neuron_data, n, 1, np.array([1])))
    for i in range(neurons_amount):
        neurons_sky.append(Neuron(input_neuron_data, n, 1, np.array([1])))

    hist_count_out = Neuron(input_neuron_data, n, neurons_amount, np.ones(neurons_amount))
    sky_count_out = Neuron(input_neuron_data, n, neurons_amount, np.ones(neurons_amount))
    hist_out = Neuron(input_neuron_data, n, 1, np.array([1]))
    sky_out = Neuron(input_neuron_data, n, 1, np.array([1]))

    out_positive = Neuron(input_neuron_data, n, 1, np.array([1]))
    out_negative = Neuron(input_neuron_data, n, 1, np.array([1]))

    isLandscape = False
    stat = Stat(image_list_count)
    stat.neuron_amount = neurons_amount
    stat.StartTimer()
    for t in range(T - 1):

        if t % single_iteration_time == 0 and t < T - end_spike_time:
            part_t = 0
            # model_hist = returnHistogram(image_list[iteration])
            # model_sky = returnSky(image_list[iteration])
            model_hist = hist_data[iteration]
            model_sky = sky_data[iteration]
            # print("iteration: " + str(iteration))
            iteration += 1
        for i in range(0, neurons_amount):
            if test_hist[i] == part_t:
                neurons[i].setCurrent(2, t, 0)
                stat.hist_fired += 1
                stat.hist_fired_tab[iteration] += 1
                #print("hist " + str(t) + " " + str(i))
            neurons[i].calc(t)
        for i in range(0, neurons_amount):
            if test_sky[i] == part_t:
                neurons_sky[i].setCurrent(2, t, 0)
                stat.sky_fired += 1
                stat.sky_fired_tab[iteration] += 1
                #print("sky " + str(t) + " " + str(i))
            neurons_sky[i].calc(t)
        #check fired
        if t > 10 and t < T - end_spike_time:
            part_t_back = part_t - 11
            for j in range(neurons_amount):
                if neurons[j].fired == t and model_hist[j] > part_t_back - hist_count_offset and model_hist[j] < part_t_back + hist_count_offset:
                    hist_count_out.setCurrent(1, t, j)

                    # print("hist fired " + str(iteration) + " " + str(t) + " " + str(j))
            for j in range(neurons_amount):
                if neurons_sky[j].fired == t and model_sky[j] > part_t_back - sky_count_offset and model_sky[j] < part_t_back + sky_count_offset:
                    sky_count_out.setCurrent(1, t, j)

                    # print("sky fired " + str(iteration) + " " + str(t) + " " + str(j))
        hist_count_out.calc(t)
        sky_count_out.calc(t)
        #last neuron
        if part_t > single_iteration_time - 50 and part_t < single_iteration_time - 45 and hist_count_out.u_step[t] > treshold:
            hist_out.setCurrent(1, t, 0)
        else:
            hist_out.setCurrent(0, t, 0)
        hist_out.calc(t)

        if part_t > single_iteration_time - 30 and part_t < single_iteration_time - 25 and sky_count_out.u_step[t] > treshold_sky:
            sky_out.setCurrent(1, t, 0)
        else:
            sky_out.setCurrent(0, t, 0)
        sky_out.calc(t)

        if part_t > single_iteration_time - 10 and part_t < single_iteration_time - 5:
            sky_count_out.u_step[t + 1] = sky_count_out.u[t]
            hist_count_out.u_step[t + 1] = hist_count_out.u[t]

        if t > T - 20 and t < T - 15:
            if T - 19 == t:
                print(" HIST u_step: " + str(hist_out.u_step[t]) + " SKY u_step: " +str(sky_out.u_step[t]))
            if sky_out.u_step[t] > out_sky_treshold and hist_out.u_step[t] > out_treshold:
                # print("last neuron fired")
                stat.isLandscape = True
                out_positive.setCurrent(1, t, 0)
            else:
                # print("last neuron not fired")
                out_negative.setCurrent(1, t, 0)
        else:
            out_positive.setCurrent(0, t, 0)
            out_negative.setCurrent(0, t, 0)
        out_positive.calc(t)
        out_negative.calc(t)

        part_t += 1
    stat.EndTimer()
    return stat
Esempio n. 30
0
 def addJogo(self, jogo):
   if self.ateConcurso == -1:
     self.setAteConcurso()
   self.workJogos.append(jogo)
   self.histG       = Stat.makeHistogram(self.workJogos)
   self.ateConcurso += 1
Esempio n. 31
0
 def WriteLat(self):
     if self.w_stat is not None:
         return self.w_stat
     with Cons.MT("Generating write latency stat ..."):
         self.w_stat = Stat.Gen(self.w_raw)
         return self.w_stat
Esempio n. 32
0
 def ReadLat(self):
     if self.r_stat is not None:
         return self.r_stat
     with Cons.MT("Generating read latency stat ..."):
         self.r_stat = Stat.Gen(self.r_raw)
         return self.r_stat
Esempio n. 33
0
 def setAteConcurso(self, ateConcurso=-1):
   self.ateConcurso = ateConcurso
   self.workJogos   = self.jogosObj.getJogosAteConcurso(ateConcurso)
   self.histG       = Stat.makeHistogram(self.workJogos)
   self.ateConcurso = len(self.workJogos)
Esempio n. 34
0
    def getInfo(self):
        if self.scoresPopulated:
            return
        else:
            newResponse = requests.get(self.url)
            newSoup = BeautifulSoup(newResponse.text, 'html.parser')
            table = newSoup.find('table')
            rows = table.find_all('tr')
            results = []
            #Skips first row, which is useless for our purposes.
            for i in range(1, len(rows)):
                table_headers = rows[i].find_all('th')
                if table_headers:
                    if (i == 1):
                        result = [
                            "Rank", "Date", "Game #", "Age", "Team", "", "Opp",
                            "Result"
                        ]
                        for j in range(len(result), len(table_headers)):
                            label = str(table_headers[j])
                            #print(label[16:].find("\""), label)
                            startIndex = label.find("\"") + 1
                            endIndex = label[
                                (startIndex):].find("\"") + startIndex
                            trueLabel = label[startIndex:endIndex]
                            result += [trueLabel]
                        results.append(result)
                    else:
                        for headers in table_headers:
                            results.append([headers.get_text()])

                table_data = rows[i].find_all('td')
                if table_data:
                    results.append([data.get_text() for data in table_data])

                #results = results[1:len(results)]

            ind = 0
            for ind in range(0, len(results)):
                #print(ind, ":", len(results[ind]), ":", results[ind])
                if ind % 2 == 1:
                    results[int(
                        (ind + 2) / 2)] = results[ind] + results[ind + 1]

            results = results[0:int(ind / 2)]
            statNames = results[0]
            self.valueList = []
            for j in range(0, len(statNames)):
                curStat = Stat.getStat(statNames[j])
                self.valueList += [curStat.value]
                statNames[j] = curStat
            for i in range(1, len(results)):
                score = 0
                for j in range(0, len(results[i])):
                    curr = results[i][j]
                    if curr.isdigit() or (len(curr) > 0 and curr[0] == "-"
                                          and curr[1:].isdigit()):
                        score += self.valueList[j] * int(results[i][j])
                self.addScore(i, round(score, 2))

            self.data = results
            self.cleanUp()
            self.getFanTable()
            self.scoresPopulated = True
Esempio n. 35
0
def main():
    path = "/Users/u15672269/stat"
    data_path = "/Users/u15672269/Desktop/For_Kseniya/однородность.xls"
    title = "Отчет о показателях качества тестовых заданий по курсу Информатика 2018-2019 учебного года 1 семестра"
    KO_I = True
    KO_II = True
    correlation = True

    report = Document()
    report.add_heading(title, 0)

    if (KO_I or KO_II or correlation):
        dictionary = DataReader.read_dictionary_from_excel(data_path)
        data = DataReader.read_raw_data_from_excel(data_path, dictionary)

        data_KO = []
        keys = []
        # состав вопросов в тесте
        test = {}
        for i in data:
            if i[2] != '':
                question = dictionary[i[0]][0]
                val = test.get(question[0])
                if val is None:
                    test[question[0]] = list()
                    test[question[0]].append(question[1])
                else:
                    if question[1] not in test[question[0]]:
                        test[question[0]].append(question[1])

                key = (question, i[1], i[2])
                if key not in keys:
                    count = sum(elem[0] == i[0] and elem[1] == key[1]
                                and elem[2] == key[2] for elem in data)
                    data_KO.append([i[0], i[1], i[2], count, i[4], i[5]])
                    keys.append(key)
        print("ok")

    if KO_I:
        print("KO_I processing started")
        formulation_stat = Stat.get_question_formulation_stat(
            Stat.count_formulation_stat(data_KO, dictionary))
        formulation_homogeneity = {}
        for key, question_stat in formulation_stat.items():
            formulation_homogeneity[key] = Stat.test_formulation_homogeneity(
                question_stat)
        DataPrinter.create_report_KO_I(report, formulation_homogeneity, path)
        print("KO_I processing finished")

    if KO_II:
        print("KO_II processing started")
        distractor_frequency_stat = Stat.get_distractor_frequency_stat(
            data_KO, dictionary)
        distractor_homogeneity = Stat.test_distractor_homogeneity(
            distractor_frequency_stat, 0.05, 0, 100)
        DataPrinter.create_report_KO_II(report, distractor_frequency_stat,
                                        distractor_homogeneity, path)
        print("KO_II processing finished")

    if correlation:
        print("correlation processing started")
        correlation_stat = Stat.get_correlation_matrix(
            test, Stat.group_stat_by_student(data, dictionary))
        DataPrinter.create_report_correlation(report, correlation_stat, path)
        print("correlation processing finished")

    report.save(os.path.join(path, '{}.docx'.format(title)))

    return
Esempio n. 36
0
    def calcAnovaIS(self):
        """TODO: implement the checks for rerun"""

        # calculates Anova on inverse space (IS)
        if self.AnovaCheck:
            self.IS = Stat.Anova(self.H5, self.Mainframe)
            # Parametric Analysis
            if self.AnovaParam:
                if self.doAnovaParamIS:
                    self.IS.Param()
                    self.progressTxt.append('Parametric Anova (IS) : %s' %
                                            self.IS.elapsedTime)

            # Non Parametric Analysis
            else:
                if self.doAnovaNonParamIS:
                    self.IS.NonParam(self.AnovaIteration)
                    self.progressTxt.append('Non-Parametric Anova (IS) : %s' %
                                            self.IS.elapsedTime)

            # Makes sure that the h5 files are always closed at the end
            self.IS.file.close()
            self.cancel = self.IS.cancel

        # calculates PostHoc on inverse space (IS)
        if self.PostHocCheck:

            self.ISPostHoc = Stat.PostHoc(self.H5, self.Mainframe)

            # Parametric Analysis
            if self.PostHocParam:
                if self.doPostHocParamIS:
                    self.ISPostHoc.Param()
                    self.progressTxt.append('Parametric PostHoc (IS) : %s' %
                                            self.ISPostHoc.elapsedTime)

            # Non Parametric Analysis
            else:
                if self.doPostHocNonParamIS:
                    self.ISPostHoc.NonParam(self.PostHocIteration)
                    self.progressTxt.append(
                        'Non-Parametric PostHoc (IS) : %s' %
                        self.ISPostHoc.elapsedTime)

            # Makes sure that the h5 files are always closed at the end
            self.ISPostHoc.file.close()
            self.cancel = self.ISPostHoc.cancel
        # Multiple testing and writing Data, Post Stat Analysis - i.e Mathematical Morphology, write
        if self.SpaceFile == '':
            self.SpaceFile = None
        Correction = PostStat.MultipleTestingCorrection(
            self.H5,
            self.Mainframe,
            TF=self.PtsConsec,
            Alpha=self.Alpha,
            SpaceCont=self.Clust,
            SpaceFile=self.SpaceFile)
        Correction.Calculation()
        self.Param = {'Anova': self.AnovaParam, 'PostHoc': self.PostHocParam}
        Writing = PostStat.WriteData(self.PathResult,
                                     self.H5,
                                     self.Param,
                                     DataGFP=False)
        Writing.StatistcalData(Correction.CorrectedMask)
        Writing.IntermediateResult()
        Writing.file.close()
Esempio n. 37
0
print("best gamma0:", best_g0)  
'''
###

sgd = SGD_SVM.SGDSVM(C=best_C,
                     ro=best_ro,
                     epochs=best_epoch,
                     W0=[0] * len(data[0]),
                     gamma0=best_g0)
sgd.fit(data, data_labels)

predictTrain = sgd.predict(data)
predictTest = sgd.predict(testset)

print("Accuracy for training set:")
print(Stat.Accuracy(predictTrain, data_labels))

print("F1 score for training set:")
print(Stat.F1_Score(predictTrain, data_labels))

print("Precision for training set:")
print(Stat.Precision(predictTrain, data_labels))

print("Recall for training set:")
print(Stat.Recall(predictTrain, data_labels))

print("Accuracy for test set")
print(Stat.Accuracy(predictTest, test_labels))

print("F1 score for test set")
print(Stat.F1_Score(predictTest, test_labels))
Esempio n. 38
0
File: test.py Progetto: tifoit/knn-1
data = result['aggregated_confusion_matrix']
list_accuracy = list(result[i]['class_stat']['accuracy'] for i in range(10))


print 'Number of instances : ', result['number_instance']
print 'Number of Features : ', len(result['column_list']) - 1
print 'Classes : ', result['list_classes']
print 'Confusion Matrix for the dataset over 10 runs :'
for i in data.keys():
    print i, ' ',
    for j in data.keys():
        print data[i][j], ' ',
    print ''

print 'Accuracy for 10 runs: ', list_accuracy
print 'Mean Accuracy : ', Stat.mean(list_accuracy)
print 'Variance : ', Stat.variance(list_accuracy)
print 'Standard Deviation : ', Stat.standard_deviation(list_accuracy)

plt.xlabel('sepal width in cm')
plt.ylabel('petal width in cm')

x, y, class_column_name = 'sepal width in cm', 'petal width in cm', result['class_column_name']
new_train_list = sorted(result['training_dataset'], key=lambda k: (float(k[y]), float(k[x])), reverse=True)
new_test_list = sorted(result['test_dataset'], key=lambda k: (float(k[y]), float(k[x])), reverse=True)

plt.axis([0, 5, 0, 3])

for i in new_train_list:
    if i[class_column_name] == 'Iris-setosa':
        ro, = plt.plot(i[x], i[y], 'ro')