Ejemplo n.º 1
0
def pubmed_search(term, max_count=0, batch_size=10, progress=True):
    search_handle = Entrez.esearch(db="pubmed", term=term, usehistory='y')

    search_results = Entrez.read(search_handle)
    search_handle.close()

    count = int(search_results["Count"])
    if max_count:
        count = max_count if max_count < count else count

    webenv = search_results["WebEnv"]
    query_key = search_results["QueryKey"] 

    affiliations = []
    data = []
    if progress:
        p_bar = progressBar(0, count, 60, message="Downloading: ")
    else:
        #dummy function
        p_bar = lambda x: None
    for start in range(0,count,batch_size):
        end = min(count, start+batch_size)
        fetch_handle = Entrez.efetch(db="pubmed", retmode="xml",
                                     retstart=start, retmax=batch_size,
                                     webenv=webenv, query_key=query_key)
        data += Entrez.read(fetch_handle)
        for el in data:
            try:
                affiliations.append(el['MedlineCitation']['Article']['Affiliation'])
            except KeyError:
                pass
        fetch_handle.close()
        p_bar(start)
    return data
Ejemplo n.º 2
0
    def only_seasonal(self):
        progress_bar = pb.progressBar(len(self.df),
                                      prefix='Progress:',
                                      suffix='Complete',
                                      length=50)
        if self.console:
            console_progress = pb.progressBar(len(self.df),
                                              prefix='Progress:',
                                              suffix='Complete',
                                              length=50,
                                              console=self.console)
        for i in range(len(self.df)):
            if type(self.df['Date'][i]) != type('27/12/1995'):
                pass
            elif i >= 1 and type(self.df['Date'][i - 1]) != type('27/12/1995'):
                year = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%Y')
                month = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%m')
                if year == self.firstyear or year == datetime.strftime(
                        datetime.strptime(self.df['Date'][i - 2], '%d/%m/%Y'),
                        '%Y'):

                    self.txt_getter_seasonal(year, month, i)
                else:
                    self.calculate_seasonal()
                    self.txt_getter_seasonal(year, month, i)
            else:
                year = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%Y')
                month = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%m')
                if year == self.firstyear or year == datetime.strftime(
                        datetime.strptime(self.df['Date'][i - 1], '%d/%m/%Y'),
                        '%Y'):

                    self.txt_getter_seasonal(year, month, i)
                else:
                    self.calculate_seasonal()
                    self.txt_getter_seasonal(year, month, i)
            if i == len(self.df) - 1:
                self.calculate_seasonal()

            # print(str(i) + ' de ' + str(len(self.df)))
            progress_bar.print_progress_bar(i)
            if self.console:
                console_progress.print_progress_bar(i)
Ejemplo n.º 3
0
def load_resources(pygame_arg):
    prog = progressbar.progressBar()
    # loading sounds
    # sounds is a dict that contains all the sounds
    # global pygame = pygame_arg
    prog.update(0)
    soundlist = [
        "laser.wav", "laser2.wav", "laser3.wav", "laser4.wav", "laser5.wav",
        "explosion.wav", "explosion2.wav", "life.wav", "ouch.wav", "loser.wav",
        "shield1.wav", "armor.wav", "plasma1.wav", "plasmagun.wav",
        "noise.wav", "menu.wav", "click.wav"
    ]

    for index in xrange(len(soundlist)):
        add_sound(soundlist[index])
        prog.update(index * 40 / len(soundlist))

    sprite_load_list = [
        "sprite_ship.png", "sprite_ship_fire.png", "sprite_ship_weapon2.png",
        "sprite_laser.png", "sprite_laser_blue.png", "sprite_enemy.png",
        "sprite_enemy_fire.png", "background.png", "backgroundtransp.png",
        "asteroid1.png", "asteroid2.png", "asteroid3.png", "planet1.png",
        "planet2.png", "planet3.png", "lifebonus.png", "armorbonus.png",
        "lifeBonusRing.png", "armorBonusRing.png", "lifemask.png", "ball1.png",
        "sprite_laser_blue_light.png", "sprite_laser_light.png",
        "ball1_light.png", "lifeBonusLight.png", "menu_micshooter.png",
        "menu_options.png", "menu_optionsblurry.png", "menu_play.png",
        "menu_playblurry.png", "menu_resume.png", "menu_resumeblurry.png",
        "menu_quit.png", "menu_quitblurry.png", "menu_sound.png",
        "menu_on.png", "menu_off.png", "menu_resolution.png",
        "menu_800600.png", "menu_800500.png", "sprite_enemy2.png",
        "plasmaBonusRing.png", "plasmabonus.png", "boss1.png", "particle1.png",
        "particle2.png", "particle3.png", "particle4.png", "barArmor.png",
        "barLife.png", "johnson.png", "smoke.png",
        "sprite_ship_shooting_plasma.png", "glow_plasma_shooting.png",
        "sprite_ship_shooting_laser.png", "glow_laser_shooting.png"
    ]

    for index in xrange(len(sprite_load_list)):
        add_sprite(sprite_load_list[index])
        prog.update((index * 60 / len(sprite_load_list)) + 40)

    pygame.display.set_icon(single_sprites["sprite_ship.png"])

    # loading sprite sequences
    add_sprite_sequence("sprite_explosion_list.png", 204, 256)
    add_sprite_sequence("sprite_explosion_list_asteroid.png", 192, 192)
    add_sprite_sequence("ship_hurt.png", 192 / 2, 192 / 2)
    prog.update(100)
    return (sounds, single_sprites, sprite_sequences)
Ejemplo n.º 4
0
def load_resources(pygame_arg):
    prog = progressbar.progressBar()
    # loading sounds
    # sounds is a dict that contains all the sounds
    # global pygame = pygame_arg
    prog.update(0)
    soundlist = ["laser.wav", "laser2.wav", "laser3.wav", "laser4.wav", "laser5.wav",
                 "explosion.wav", "explosion2.wav", "life.wav", "ouch.wav", "loser.wav", "shield1.wav",
                 "armor.wav", "plasma1.wav", "plasmagun.wav", "noise.wav", "menu.wav", "click.wav"]

    for index in xrange(len(soundlist)):
        add_sound(soundlist[index])
        prog.update(index * 40 / len(soundlist))

    sprite_load_list = ["sprite_ship.png", "sprite_ship_fire.png",  "sprite_ship_weapon2.png",
                        "sprite_laser.png", "sprite_laser_blue.png", "sprite_enemy.png",
                        "sprite_enemy_fire.png", "background.png", "backgroundtransp.png", "asteroid1.png",
                        "asteroid2.png", "asteroid3.png", "planet1.png", "planet2.png", "planet3.png",
                        "lifebonus.png", "armorbonus.png", "lifeBonusRing.png", "armorBonusRing.png",
                        "lifemask.png", "ball1.png", "sprite_laser_blue_light.png", "sprite_laser_light.png",
                        "ball1_light.png", "lifeBonusLight.png", "menu_micshooter.png", "menu_options.png",
                        "menu_optionsblurry.png", "menu_play.png", "menu_playblurry.png", "menu_resume.png",
                        "menu_resumeblurry.png",  "menu_quit.png", "menu_quitblurry.png", "menu_sound.png",
                        "menu_on.png", "menu_off.png", "menu_resolution.png", "menu_800600.png", "menu_800500.png",
                        "sprite_enemy2.png", "plasmaBonusRing.png", "plasmabonus.png", "boss1.png",
                        "particle1.png", "particle2.png", "particle3.png", "particle4.png", "barArmor.png", "barLife.png",
                        "johnson.png", "smoke.png", "sprite_ship_shooting_plasma.png", "glow_plasma_shooting.png",
                        "sprite_ship_shooting_laser.png", "glow_laser_shooting.png"]

    for index in xrange(len(sprite_load_list)):
        add_sprite(sprite_load_list[index])
        prog.update((index * 60 / len(sprite_load_list)) + 40)

    pygame.display.set_icon(single_sprites["sprite_ship.png"])

    # loading sprite sequences
    add_sprite_sequence("sprite_explosion_list.png", 204, 256)
    add_sprite_sequence("sprite_explosion_list_asteroid.png", 192, 192)
    add_sprite_sequence("ship_hurt.png", 192 / 2, 192 / 2)
    prog.update(100)
    return (sounds, single_sprites, sprite_sequences)
Ejemplo n.º 5
0
def historic_to_df(historic, year, start_month='05', end_month='12'):

    start_time = year + '-' + start_month +'-01 00:00:00'
    if end_month == '01':
        end_time = str(int(year)+1) + '-' + end_month + '-01 00:00:00'
    else:
        end_time = year + '-' + end_month + '-01 00:00:00'

    df = pd.read_csv(historic, sep='\t')
    print('Historic to df:\n')
    progress_bar = pb.progressBar(len(df['Date']), prefix='Progress:', suffix='Complete', length=50)
    df['added'] = df['Date'] + ' ' + df['Time']
    for i in range(0, len(df['Date'])):
        if str(df['added'][i]) == 'nan':
            pass
        else:
            df.at[i, 'added'] = datetime.strftime(datetime.strptime(str(df['added'][i]), '%d/%m/%Y %H:%M:%S'),
                                                   '%Y-%m-%d %H:%M:%S')
        progress_bar.print_progress_bar(i)
    df.set_index('added', inplace=True)
    df.index.name = None
    del df['Date']
    del df['Time']
    if start_time not in df.index:
        n = [datetime.strptime(str(i), '%Y-%m-%d %H:%M:%S') for i in df.index if str(i) != 'nan']
        start_time = datetime.strftime(min(n, key=lambda x: abs(x - datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S'))), '%Y-%m-%d %H:%M:%S')
    if end_time not in df.index:
        n = [datetime.strptime(str(i), '%Y-%m-%d %H:%M:%S') for i in df.index if str(i) != 'nan']
        end_time = datetime.strftime(min(n, key=lambda x: abs(x - datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S'))),
                                     '%Y-%m-%d %H:%M:%S')
        filtered_df = df[start_time: end_time]
    else:
        filtered_df = df[start_time: end_time]
    if filtered_df.columns[0] == '5':
        filtered_df.insert(0, '0', filtered_df['5'], allow_duplicates=True)

    return filtered_df.interpolate(axis=1), np.nanmin(df.values), np.nanmax(df.values)
Ejemplo n.º 6
0
    def findStackElement(self,
                         numCores=4,
                         parallelize=True,
                         numCalc=100,
                         stackElement=[],
                         goals=[],
                         sweep=[],
                         chunksize=1):

        if len(goals) == 0:
            raise ValueError(
                "You have to define goals for the stack optimization.")
        if (not type(goals) is list) | (not type(goals[0]) is dict):
            raise TypeError(
                "Wrong data type given for input parameter goals. Expected list of dicts."
            )
        if (len(stackElement) == 0) & (len(self.stackElement) == 0):
            raise ValueError("No stack element given for optimization.")
        elif len(stackElement) == 0:
            stackElement = self.stackElement
        if not type(sweep) is dict:
            raise ValueError("Expected dict for sweep definition.")

        print("Generating test configurations...")

        tmp = []
        for r in sweep['range']:
            tmp.append(r)
        r = sweep['count']
        tmp.append(r)

        sweepSpace = list(itertools.product(*tmp))
        stackSpace = []

        c = 0
        for config in sweepSpace:
            foo = np.array(list(config))
            elem = self.defineStackElement(material=stackElement['material'],
                                           thickness=foo[np.array(
                                               sweep['layer'])],
                                           spread=[0.1E-3 / 3, 0.1E-3 / 3],
                                           useError=[True, True],
                                           absSpread=[True, True])
            stack = self.defineStack(elem=[elem],
                                     qty=[int(foo[-1])],
                                     label=str(c))
            stackSpace.append(stack.copy())
            c += 1

        print(str(len(stackSpace)) + " configurations generated.")
        print(
            str(len(stackSpace) * len(goals) * numCalc) +
            " calculations in total.")

        avail_cores = multiprocessing.cpu_count()
        print(str(avail_cores) + " cores available.")
        if parallelize:
            if numCores > avail_cores:
                print("Warning: demanded " + str(numCores) +
                      " cores but only " + str(avail_cores) +
                      " cores available.")
                numCores = avail_cores
        else:
            numCores = 1

        print("Using " + str(numCores) + " core(s).")

        #calculate frequency response for all given configurations
        finalists = []
        finalistsResult = []
        configResults = []
        freqs = [elem['f'] for elem in goals]

        pb = progressBar(total=len(stackSpace))
        i = 0
        total = len(stackSpace)

        if parallelize:
            t1 = time.time()
            job_args = []
            #prepare job arguments
            for config in stackSpace:
                job_args.append((freqs, config, goals, numCalc))
            with multiprocessing.Pool(processes=numCores) as pool:
                poolResult = pool.map_async(
                    self.errorStudyWithConstraints_helper, job_args, chunksize)
                while (True):
                    if (poolResult.ready()): break
                    tasksLeft = pool._cache[list(
                        pool._cache.keys())[0]]._number_left
                    chunkSize = pool._cache[list(
                        pool._cache.keys())[0]]._chunksize
                    pb.update(total - chunkSize * tasksLeft)
                    time.sleep(1)
                pool.close()
                pool.join()
            pb.finish(timeElapsed=True)

            poolResult = poolResult.__dict__['_value']

            #check if all goals met = candidate found
            for elem in poolResult:
                if len(elem) == 2:
                    finalists.append(elem[0])
                    finalistsResult.append(elem[1])

            pb.finish(timeElapsed=True)
        else:
            t1 = time.time()
            for config in stackSpace:
                #calculate frequency response for POI and check goals
                goalResult = []
                configResult = []
                #frequency response is only calculated for points of interest since otherwise it would consume too much time
                studyResult = self.errorStudyWithConstraintsMinimal(
                    freqs, config, goals, numCalc)

                if len(studyResult) == 2:
                    finalists.append(studyResult[0])
                    finalistsResult.append(studyResult[1])

                i += 1
                pb.update(i)

        timeElapsed = time.time() - t1
        totalCalculations = len(stackSpace) * len(freqs) * numCalc

        print("")
        print(
            str(len(finalists)) + " candidates found with given constraints.")
        print("Total time elapsed:" + str(timeElapsed) + " seconds.")
        print("Configurations tested:" + str(len(stackSpace)))
        print("Total number of calculations:" + str(totalCalculations))
        print(
            str(np.round(totalCalculations / timeElapsed, 2)) +
            " calculations per second")

        return {'stackElement': finalists, 'result': finalistsResult}
Ejemplo n.º 7
0
from progressbar import progressBar
import sys

f = open("progress.txt", "r+")
line = f.read()
if len(sys.argv) == 1:
    print("usage: progress.py MAXSIZE")
else:
    prog = progressBar(maxValue=int(sys.argv[1]))
    prog.updateAmount(int(line))
    prog.draw()
    print("")
f.close()
def generateScrambledLinearAlignmentV2(args):
    # need to encode utf8
    reload(sys)
    sys.setdefaultencoding('utf-8')
    print args.verbose
    # read the corpus
    corpusfile = args.corpus
    parser = make_parser()
    corpus = CorpusHandler(encoding="UTF-8", includedir=corpusfile)
    parser.setContentHandler(corpus)
    parser.parse(corpusfile)

    corpus_name = corpus.name

    xmlaligns = {}
    for xmlpath in args.xmlalignments.split(','):
        xmlaligns.update(copy.deepcopy(readAlignXML(xmlpath).content))

    orths2class = []
    orths2phones = []  # use dictionary for caching
    labels_of_si = []
    siPhones = []

    # now get the class labels using lexiconahndler: input every entry from actualOrth and state number
    lexParsers = []
    lexicons = []
    for index, lex in enumerate(args.lexiconfile.split(',')):
        lexParsers.append(make_parser())
        lexicons.append(LexiconHandler(int(args.states.split(',')[index])))
        lexParsers[index].setContentHandler(lexicons[index])
        assert (args.lexiconfile)
        lexParsers[index].parse(lex)
        siPhones.append(lexicons[index].getLemmaByOrth("[SILENCE]").phon[0])
        labels_of_si.append(
            str(lexicons[index].getMixturesByPhon(siPhones[index])[0]))

    numStreams = len(lexicons)

    convert_orth = Convert_Orth(numStreams, lexicons, siPhones)
    linear_aligner = Linear_Aligner()

    if args.verbose:
        print "number of files to processed: ", len(corpus.recordings)
    else:
        progBar = progressbar.progressBar(0, len(corpus.recordings), 77)
        FileCounter = 0
        progBar.draw()

    if args.maximum_pronunciation_number_to_consider is None:
        maximum_pronunciation_number_to_consider = 999999
    else:
        maximum_pronunciation_number_to_consider = args.maximum_pronunciation_number_to_consider

    outFileHandle = open(args.output, "wb")

    if args.verbose:
        print "number of files to processed: ", len(xmlaligns)
    else:
        progBar = progressbar.progressBar(0, len(xmlaligns), 77)
        FileCounter = 0
        progBar.draw()

    for rec in xmlaligns:
        (image_path, imageList,
         imageListLen) = getNumberOfImages(corpus_name, rec)

        if args.verbose:
            print "current processed segment is " + rec
        else:
            FileCounter += 1
            progBar.updateAmount(FileCounter)
            progBar.draw()

        index = args.scrambleStream
        num_words = len(xmlaligns[rec])
        for align in xmlaligns[rec]:
            orth = align[0].split("/")[0].split(" ")[0]
            #print align

            if args.verbose is True:
                print ""
                print "segment", rec, "orth", orth
                for i in xrange(numStreams):
                    print "lexicon(" + str(i) + ")", convert_orth.get_phones(
                        i, orth)  #,"   stateList("+str(i)+")", stateList[i]

            #(image_path, imageList, imageListLen) =  getNumberOfImages(corpus.name, rec.name)
            #if args.ignoreMissingImages and imageListLen is None:
            #    continue
            #elif imageListLen is None:
            #    print "error, directory", rec.name,"does not exist"
            #    sys.exit()
        final_alignments = []
        for i in xrange(numStreams):
            #num_words = len(rec.orth)
            pron_index = [0] * num_words
            all_prons_done = False
            prons_done = [False] * num_words
            alignments = []
            linear_aligner.change_sil_padding(0, 0, 0, 0)

            while not all_prons_done:
                expandedStateList = []

                alignments.append([])
                for word_index, align in enumerate(xmlaligns[rec]):
                    expandedStateList = []
                    currentOrth = align[0].split("/")[0].split(" ")[0]
                    alignmentLen = align[2] + 1 - align[1]

                    expandedStateList.append(
                        copy.deepcopy([
                            word for word in convert_orth.get_class_ids(
                                i, currentOrth)
                        ][pron_index[word_index]]))
                    alignments[-1].extend(
                        copy.deepcopy(
                            linear_aligner.get_align(alignmentLen,
                                                     expandedStateList,
                                                     labels_of_si[i])))

                #    print "tmp",word_index," stream",i, currentOrth ,expandedStateList, "aligned", alignments[-1]
                #print "stream",i,expandedStateList
                all_prons_done = True

                for word_index, align in enumerate(xmlaligns[rec]):
                    currentOrth = align[0].split("/")[0].split(" ")[0]
                    pron_index[word_index] += 1
                    if pron_index[word_index] < len(
                            convert_orth.get_class_ids(i, currentOrth)
                    ) and pron_index[
                            word_index] < maximum_pronunciation_number_to_consider:
                        if prons_done[word_index] is False:
                            all_prons_done = False
                    else:
                        prons_done[word_index] = True
                        pron_index[word_index] = 0

                #print expandedStateList
                    alignmentLen = align[2] + 1 - align[1]
                    linear_aligner.change_sil_padding(0, 0, 0, 0)
                    alignments[-1].extend(
                        copy.deepcopy(
                            linear_aligner.get_align(alignmentLen,
                                                     expandedStateList,
                                                     labels_of_si[i])))
                alignments[-1].extend(labels_of_si[i])
            #scramble all possible alignments
            num_alignments = len(alignments)
            align_index = 0
            for n in xrange(imageListLen):
                if n >= len(final_alignments):
                    final_alignments.append([])
                final_alignments[n].append(alignments[align_index][n])
                align_index += 1
                if align_index >= num_alignments:
                    align_index = 0
        write_to_file(outFileHandle, final_alignments, image_path, imageList)
        if not args.verbose:
            FileCounter += 1
            progBar.updateAmount(FileCounter)
            progBar.draw()
def generateLinearAlignedClassLabel(args):

    # need to encode utf8
    reload(sys)
    sys.setdefaultencoding('utf-8')

    # read the corpus
    corpusfile = args.corpus
    parser = make_parser()
    corpus = CorpusHandler(encoding="UTF-8", includedir=corpusfile)
    parser.setContentHandler(corpus)
    parser.parse(corpusfile)

    labels_of_si = []
    siPhones = []

    # now get the class labels using lexiconahndler: input every entry from actualOrth and state number
    lexParsers = []
    lexicons = []
    for index, lex in enumerate(args.lexiconfile.split(',')):
        lexParsers.append(make_parser())
        lexicons.append(LexiconHandler(int(args.states.split(',')[index])))
        lexParsers[index].setContentHandler(lexicons[index])
        assert (args.lexiconfile)
        lexParsers[index].parse(lex)
        siPhones.append(lexicons[index].getLemmaByOrth("[SILENCE]").phon[0])
        labels_of_si.append(
            str(lexicons[index].getMixturesByPhon(siPhones[index])[0]))

    numStreams = len(lexicons)

    convert_orth = Convert_Orth(numStreams, lexicons, siPhones)
    linear_aligner = Linear_Aligner()

    if args.verbose:
        print "number of files to processed: ", len(corpus.recordings)
    else:
        progBar = progressbar.progressBar(0, len(corpus.recordings), 77)
        FileCounter = 0
        progBar.draw()

    if args.maximum_pronunciation_number_to_consider is None:
        maximum_pronunciation_number_to_consider = 999999
    else:
        maximum_pronunciation_number_to_consider = args.maximum_pronunciation_number_to_consider

    outFileHandle = open(args.output, "wb")
    for rec in corpus.recordings:
        if args.verbose:
            print ""
            print "segment", rec.name, "orth", rec.orth
            for i in xrange(numStreams):
                print "lexicon(" + str(i) + ")", [
                    convert_orth.get_phones(i, o) for o in rec.orth
                ]  #,"   stateList("+str(i)+")", stateList[i]
        (image_path, imageList,
         imageListLen) = getNumberOfImages(corpus.name, rec.name)
        if args.ignoreMissingImages and imageListLen is None:
            continue
        elif imageListLen is None:
            print "error, directory", rec.name, "does not exist"
            sys.exit()
        final_alignments = []
        for i in xrange(numStreams):
            num_words = len(rec.orth)
            pron_index = [0] * num_words
            all_prons_done = False
            prons_done = [False] * num_words
            alignments = []
            while not all_prons_done:
                expandedStateList = []

                for word_index, currentOrth in enumerate(rec.orth):
                    expandedStateList.append(
                        copy.deepcopy([
                            word for word in convert_orth.get_class_ids(
                                i, currentOrth)
                        ][pron_index[word_index]]))

                all_prons_done = True
                for word_index, currentOrth in enumerate(rec.orth):
                    pron_index[word_index] += 1
                    if pron_index[word_index] < len(
                            convert_orth.get_class_ids(i, currentOrth)
                    ) and pron_index[
                            word_index] < maximum_pronunciation_number_to_consider:
                        if prons_done[word_index] is False:
                            all_prons_done = False
                    else:
                        prons_done[word_index] = True
                        pron_index[word_index] = 0

                #print expandedStateList
                alignments.append(
                    copy.deepcopy(
                        linear_aligner.get_align(imageListLen,
                                                 expandedStateList,
                                                 labels_of_si[i])))

            #scramble all possible alignments
            num_alignments = len(alignments)
            align_index = 0
            for n in xrange(imageListLen):
                if n >= len(final_alignments):
                    final_alignments.append([])
                final_alignments[n].append(alignments[align_index][n])
                align_index += 1
                if align_index >= num_alignments:
                    align_index = 0
        write_to_file(outFileHandle, final_alignments, image_path, imageList)
        if not args.verbose:
            FileCounter += 1
            progBar.updateAmount(FileCounter)
            progBar.draw()

        #print final_alignments

    outFileHandle.close()
Ejemplo n.º 10
0
import time
import progressbar

bar = progressbar.progressBar()

for i in bar(range(100)):
    time.sleep(0.02)
Ejemplo n.º 11
0
    def multiyear_mean_calculator(self):
        progress_bar = pb.progressBar(len(self.df),
                                      prefix='Progress:',
                                      suffix='Complete',
                                      length=50)
        if self.console:
            console_progress = pb.progressBar(len(self.df),
                                              prefix='Progress:',
                                              suffix='Complete',
                                              length=50,
                                              console=self.console)
        # Calculates the multiyear mean for the annual t-cycles plot
        for i in range(len(self.df)):
            if type(self.df['Date'][i]) != type('27/12/1995'):
                pass
            elif i >= 1 and type(self.df['Date'][i - 1]) != type('27/12/1995'):
                year = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%Y')
                month = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%m')

                if month == self.firstmonth or month == datetime.strftime(
                        datetime.strptime(self.df['Date'][i - 2], '%d/%m/%Y'),
                        '%m'):

                    self.monthly_getter(year, month, i)

                else:
                    self.firstmonth = '00'
                    self.monthly_setter()
                    self.monthly_getter(year, month, i)
            else:
                year = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%Y')
                month = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%m')

                if month == self.firstmonth or month == datetime.strftime(
                        datetime.strptime(self.df['Date'][i - 1], '%d/%m/%Y'),
                        '%m'):
                    self.monthly_getter(year, month, i)
                else:
                    self.firstmonth = '00'
                    self.monthly_setter()
                    self.monthly_getter(year, month, i)
            if i == len(self.df) - 1:
                self.monthly_setter()

            # print(str(i) + ' de ' + str(len(self.df)))
            progress_bar.print_progress_bar(i)
            if self.console:
                console_progress.print_progress_bar(i)

        self.monthlymeandf = pd.DataFrame(columns=['month', 'depth', 'mean'])
        monthlydict = {'month': 0, 'depth': 0, 'mean': 0}
        self.mydf2.replace(0, np.nan, inplace=True)
        for month in self.mydf2['month'].unique():
            for depth in self.mydf2['depth(m)'].unique():
                monthlydict['month'] = month
                monthlydict['depth'] = depth
                monthlydict['mean'] = np.nanmean(
                    self.mydf2.loc[(self.mydf2['month'] == month) &
                                   (self.mydf2['depth(m)'] == depth), 'mean'])
                self.monthlymeandf = self.monthlymeandf.append(
                    monthlydict, ignore_index=True)
Ejemplo n.º 12
0
    def calculate_lastyear(self):
        #Here calculates only from May to December of the last Year of Data
        progress_bar = pb.progressBar(len(self.df),
                                      prefix='Progress:',
                                      suffix='Complete',
                                      length=50)
        if self.console:
            console_progress = pb.progressBar(len(self.df),
                                              prefix='Progress:',
                                              suffix='Complete',
                                              length=50,
                                              console=self.console)
        for i in range(len(self.df)):

            if type(self.df['Date'][i]) != type('27/12/1995'):
                pass
            elif i >= 1 and type(self.df['Date'][i - 1]) != type('27/12/1995'):
                year = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%Y')
                month = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%m')
                if year == self.firstyear or year == datetime.strftime(
                        datetime.strptime(self.df['Date'][i - 2], '%d/%m/%Y'),
                        '%Y'):

                    if month == self.firstmonth or month == datetime.strftime(
                            datetime.strptime(self.df['Date'][i - 2],
                                              '%d/%m/%Y'), '%m'):

                        if self.df['Date'][i] == self.firstdate or self.df[
                                'Date'][i] == self.df['Date'][i - 2]:
                            self.txt_getter(year, month, i)
                        else:
                            self.excel_setter(month)
                            self.txt_getter(year, month, i)
                    else:
                        self.firstmonth = '00'
                        self.excel_setter(month)
                        self.txt_getter(year, month, i)
                else:
                    self.excel_setter(month)
                    self.excel_setter()
                    self.txt_getter(year, month, i)
            else:
                year = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%Y')
                month = datetime.strftime(
                    datetime.strptime(self.df['Date'][i], '%d/%m/%Y'), '%m')
                if year == self.firstyear or year == datetime.strftime(
                        datetime.strptime(self.df['Date'][i - 1], '%d/%m/%Y'),
                        '%Y'):

                    if month == self.firstmonth or month == datetime.strftime(
                            datetime.strptime(self.df['Date'][i - 1],
                                              '%d/%m/%Y'), '%m'):
                        if self.df['Date'][i] == self.firstdate or self.df[
                                'Date'][i] == self.df['Date'][i - 1]:
                            self.txt_getter(year, month, i)
                        else:
                            self.excel_setter(month)
                            self.txt_getter(year, month, i)
                    else:
                        self.firstmonth = '00'
                        self.excel_setter(month)
                        self.txt_getter(year, month, i)
                else:
                    self.excel_setter(month)
                    self.txt_getter(year, month, i)
            if i == len(self.df) - 1:
                self.excel_setter(month)

            # print(str(i) + ' de ' + str(len(self.df)))
            progress_bar.print_progress_bar(i)
            if self.console:
                console_progress.print_progress_bar(i)
def main(argv):
    args = getOptions()

    # Make sure that caffe is on the python path:
    sys.path.append(args.path)

    import caffe

    # read the filenames and create a dictionary, which contains for each segment its including number of images
    if args.verbose:
        print "Reading test filenames..."

    # imagedict and runningSegments are used later to compare the caffe-net-output-file-names with the entries in the imageDict
    imageDict = dict()
    runningSegments = []

    # open class label textfile
    lines = open(args.classlist)

    # for each image in the classlabel file store only its corpus/segment/1/image to the images list
    # and create a dictionary, in which for each segment you have the number of images.
    # This will be used in later functions, to controll the beginning and end of one segment and to check, if every images were really processed
    for line in lines:

        line = "/" + line
        tempString = re.sub('/1/', '/', line.strip('\n')).rsplit('/', 3)[1:-1]
        segmentname = '/'.join(tempString) + '/1'

        #else:
        # some formatting stuff to only store the relevant path information
        #        tempString = line.strip('\n').rsplit('/', 4)[1:5]
        #        segmentname = '/'.join(tempString[0:3])

        runningSegments.append(segmentname)

        # create the dictionary segment - number of containing images
        if segmentname not in imageDict:
            imageDict[segmentname] = 0
        imageDict[segmentname] += 1

    # num of total images to process
    numImages = len(runningSegments)

    prevSegmentName = None
    # some auxiliary outputs
    print "the number of segments are: ", len(imageDict.keys())
    print "the total number of processed images are: ", numImages
    print " an example segment looks like this", imageDict.keys()[0]

    # Make sure that caffe is in the python path:
    caffe_root = '../'  # this file is expected to be in {caffe_root}/examples
    sys.path.insert(0, caffe_root + 'python')

    net = caffe.Net(args.net, args.model, getattr(caffe, args.partition))

    # GPU mode
    #caffe.set_device(int(gpu))
    caffe.set_mode_gpu()
    #caffe.set_device(int(gpu))
    batch_size = len(net.blobs[args.layer.split(',')[0]].data)

    if args.verbose:
        print "batch_size: ", batch_size

    iterOverMinibatch = int(math.ceil(
        float(numImages) / batch_size))  # number of images over batch size
    global numLayers
    numLayers = len(args.layer.split(','))
    layers = args.layer.split(',')
    global fas
    fas = []
    outnames = []
    for x in xrange(numLayers):
        if len(args.output.split(',')) == numLayers:
            outnames.append(args.output.split(',')[x])
        else:
            outnames.append(args.output.split(',')[0] + '.' + str(x))
        # create the archive file, if there is already one, delete and recreate it
        if os.path.exists(outnames[x]):
            os.remove(outnames[x])
        fas.append(FileArchive(outnames[x], False))

    if args.verbose:
        print "number of npass is :", iterOverMinibatch

    # dictionary, which entries will be written in the archive format, its structure is: segment - [list of posteriors for the images in one segment]
    # IMPORTANT!!!!!    here we assume, that the images are in correct incrementing order!!!
    global forWritingDict
    forWritingDict = []
    threads = []
    for x in xrange(numLayers):
        forWritingDict.append({})
        threads.append(None)
    if not args.verbose:
        progBar = progressbar.progressBar(0, iterOverMinibatch, 77)
        FileCounter = 0
        progBar.draw()
    for i in range(iterOverMinibatch):
        if args.verbose:
            print "processing unit is: " + str(i)
        else:
            FileCounter += 1
            progBar.updateAmount(FileCounter)
            progBar.draw()

        net.forward()  # call once for allocation
        for j in range(len(net.blobs[args.layer.split(',')[0]].data)):
            imgNum = i * batch_size + j

            #for layer in args.layer.split(','):  # deal with comma-separated list
            #        feat.append(net.blobs[layer].data[j].tolist())

            # get the segmentname
            if imgNum >= len(runningSegments):
                print "imgNum", imgNum, "len(runningSegments)", len(
                    runningSegments)
                break
            segmentName = runningSegments[imgNum]
            if args.verbose:
                print "segment Name", segmentName, "numImages in Segment", imageDict[
                    segmentName]

            #if segmentname is not found in the forWriting Dictionary make a new entry
            if segmentName not in forWritingDict[0]:
                for x in xrange(numLayers):
                    forWritingDict[x][segmentName] = []
                    #[ net.blobs[layers[x]].data[j] ]
            #else:
            # then add the feature into the featurelist
            for x in xrange(numLayers):
                forWritingDict[x][segmentName].append(
                    net.blobs[layers[x]].data[j].tolist())
                #print ""
                #print forWritingDict[x][segmentName]
                #sys.exit()
            # then check, if the entries are full, if this is full, write it into archive format and pop the entry from dict

            # check if the one segment has been filled completely, if this is the case, write it in the Sprint archive format
            # print imageDict[segmentName], len(forWritingDict[segmentName]), segmentName, runningSegments[imgNum]
            sequenceTotImages = imageDict[segmentName]

            if len(forWritingDict[0][segmentName]) == sequenceTotImages:
                time = np.array([[0.0, 0.04]])
                t = np.array(time)
                for step in range(1, sequenceTotImages):
                    time = time + 0.04
                    t = np.append(t, time, axis=0)

                for x in xrange(numLayers):
                    #f = np.array(forWritingDict[x][segmentName])

                    # if args.verbose:
                    #        print len(f)
                    #        print len(t)
                    if threads[x] is not None:
                        threads[x].join()
                        forWritingDict[x].pop(prevSegmentName)
                    threads[x] = threading.Thread(
                        target=writeToCache,
                        args=(forWritingDict[x][segmentName], segmentName, t,
                              x))
                    #threads[x] = multiprocessing.Process(target=writeToCache, args=(forWritingDict[x][segmentName], segmentName, t, x))
                    threads[x].start()
                    # fas[x].addAttributes(segmentName, len(forWritingDict[x][segmentName]), len(t))
                    # fas[x].addFeatureCache(segmentName, np.array(forWritingDict[x][segmentName]), t)

                    # delete the entries from forWritingDict and imageDict only needed for debugging
                if prevSegmentName is not None:
                    imageDict.pop(prevSegmentName)
                # forWritingDict[x].pop(segmentName)
                # imageDict.pop(segmentName)

                #debug!
                #for x in xrange(numLayers):
                #        fas[x].finalize()
                #exit()
                prevSegmentName = segmentName

    # at the end write everythin in the sprint archive format

    for thread in threads:
        if thread is not None:
            thread.join()
    for x in xrange(numLayers):
        fas[x].finalize()
    print ""
    print "remaining entries:", len(imageDict)
    for seg in imageDict:
        print seg
Ejemplo n.º 14
0
def main():
    print_options()
    choice = get_input()

    # global variables
    start_year = 0
    end_year = 0

    # recursively process all e statements
    if choice == '1':
        statement_pattern = '\d+X+\d+-20\d{2}-\d{2}-\d{2}-20\d{2}-\d{2}-\d{2}.pdf$'
        statement_paths = Path('.').rglob('*.pdf')
        statements = []

        # rename statements
        for path in statement_paths:
            if search(statement_pattern, str(path.name)):
                rename_file(path)
                statements.append(path)

        if len(statements) < 1:
            print('could not find any e statements...')
            os.system("pause")
            exit(0)

        start_year = int(statements[0].name.split('-')[1])
        end_year = int(statements[len(statements) - 1].name.split('-')[1])
        years = []
        for i in range(start_year, end_year):
            years.append(i)

        data = prepare_data_json(start_year, end_year)

        # open statements and write to csv file
        csv_file = 'all-transactions.csv'
        csv_header = 'id, account , date, description, category, amount\n'
        f = open(csv_file, 'w')
        f.write(csv_header)
        for statement in progressBar(statements,
                                     prefix='Progress:',
                                     suffix='Complete',
                                     length=50):
            if len(statement.name) == 38:
                get_debit_data(statement, f, data)
            if len(statement.name) == 42:
                get_credit_data(statement, f, data)
        f.close()

        # give transactions an id
        add_transaction_id()
        # round amounts to two decimal places
        for i in data['category']:
            for j in data['category'][i]:
                data['category'][i][j] = round(data['category'][i][j], 2)

        for i in data['month']:
            for j in data['month'][i]:
                data['month'][i][j] = round(data['month'][i][j], 2)
    # process all-transactions.csv
    elif choice == '2':
        if not os.path.isfile('all-transactions.csv'):
            print('did not find all-transactions.csv')
            os.system("pause")
            exit(0)

        with open("all-transactions.csv", 'r') as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            rows = list(csv_reader)
            start_year = int(rows[1][2].split('-')[0])
            end_year = int(rows[len(rows) - 1][2].split('-')[0])
            data = prepare_data_json(start_year, end_year)
            line_count = 0
            year = 0
            month = ''
            amount = 0
            for row in progressBar(rows,
                                   prefix='Progress:',
                                   suffix='Complete',
                                   length=50):
                if line_count == 0:
                    line_count += 1
                else:
                    year = row[2].split('-')[0]
                    month = get_month_name(row[2].split('-')[1])
                    amount = float(row[5])
                    data['category'][row[4]][int(year)] = amount
                    data['month'][month][int(year)] = amount
                    line_count += 1
    # save data to json file
    save_data_json(data)
    # prepare data.js variables for visualization
    make_data_js(start_year, end_year)
    # convert all-transactions.csv to html table
    make_table()
Ejemplo n.º 15
0
    def errorStudy(self,
                   fRange=[120E9, 150E9, 50E6],
                   stack=[],
                   quiet=False,
                   incidentAngle=0,
                   extractPOIs=False,
                   pois=[128.00, 130.00, 140.0, 133.0, 137.3, 142.8, 135.00],
                   numCalc=10,
                   parallelize=False,
                   numCores=2,
                   output="",
                   chunksize=1):
        """
		Runs the calculation for the filter characteristic $numCalc times with normal distributed random errors for layer
		thickness and then averages over transmission, reflection etc.
		In particular the standard deviation is calculated for every frequency in the transmission curve to 
		quantify the sensitivity of the filter design to manufacturing tolerances
		"""

        if not len(stack) == 0:
            self.stack = stack

        sigmas = []
        means = []
        freq = []

        if fRange[0] == fRange[1]:
            stepWidth = 1
            steps = len(np.linspace(fRange[0], fRange[0], stepWidth))
        else:
            stepWidth = int((fRange[1] - fRange[0]) / fRange[2])
            steps = len(np.linspace(fRange[0], fRange[1], stepWidth))

        raw = dict({
            'T': np.zeros((numCalc, steps)),
            'R': np.zeros((numCalc, steps)),
            'A': np.zeros((numCalc, steps))
        })

        avail_cores = multiprocessing.cpu_count()

        if not quiet:
            print(str(avail_cores) + " cores available.")
        if parallelize & (not quiet):
            if numCores > avail_cores:
                print("Warning: demanded " + str(numCores) +
                      " cores but only " + str(avail_cores) +
                      " cores available.")
                numCores = avail_cores
                print("Using " + str(numCores) + " cores...")
                results = Parallel(n_jobs=numCores)(
                    delayed(calculateFrequencyResponse)(i) for i in inputs)
        else:
            numCores = 1

        if not quiet:
            print("Using " + str(numCores) + " core(s).")

        if not quiet:
            print("Performing random error sweep...")
            pb = progressBar(total=numCalc)
            pb.show()

        if parallelize:

            job_args = [(fRange[0], fRange[1], fRange[2], True, True, True,
                         incidentAngle) for i in range(numCalc)]
            with multiprocessing.Pool(processes=numCores) as pool:
                poolResult = pool.map_async(
                    self.calculateFrequencyResponse_helper, job_args,
                    chunksize)
                while (True):
                    if (poolResult.ready()): break
                    if not quiet:
                        tasksLeft = pool._cache[list(
                            pool._cache.keys())[0]]._number_left
                        chunkSize = pool._cache[list(
                            pool._cache.keys())[0]]._chunksize
                        pb.update(numCalc - chunkSize * tasksLeft)
                    time.sleep(1)
                pool.close()
                pool.join()
            if not quiet:
                pb.finish(timeElapsed=True)
            poolResult = poolResult.__dict__['_value']

            for itervar in range(0, numCalc):
                raw['T'][itervar, :] = poolResult[itervar]['T']
                raw['R'][itervar, :] = poolResult[itervar]['T']
                raw['A'][itervar, :] = poolResult[itervar]['T']
            result = poolResult[0]

        else:
            itervar = 0
            while itervar < numCalc:
                result = self.calculateFrequencyResponse(
                    fRange[0],
                    fRange[1],
                    fRange[2],
                    useError=True,
                    buildStack=True,
                    quiet=True,
                    incidentAngle=incidentAngle)
                raw['T'][itervar, :] = result['T']
                raw['R'][itervar, :] = result['R']
                raw['A'][itervar, :] = result['A']
                if not quiet:
                    pb.update(itervar)
                itervar += 1
            if not quiet:
                pb.finish(timeElapsed=True)

        freq = result['freqs']
        result=dict({'freqs':freq,'devT':np.zeros(steps),'devA':np.zeros(steps),'devR':np.zeros(steps),\
        'T':np.zeros(steps),'A':np.zeros(steps),'R':np.zeros(steps)})

        if not quiet:
            print("Calculating mean and deviation...")
            pb = progressBar(total=steps)
            pb.show()
        for stepvar in range(0, steps):
            result['T'][stepvar] = np.mean(raw['T'][:, stepvar])
            result['devT'][stepvar] = np.std(raw['T'][:, stepvar])
            result['R'][stepvar] = np.mean(raw['R'][:, stepvar])
            result['devR'][stepvar] = np.std(raw['R'][:, stepvar])
            result['A'][stepvar] = np.mean(raw['A'][:, stepvar])
            result['devA'][stepvar] = np.std(raw['A'][:, stepvar])
            #if not quiet:
            #	pb.update(stepvar)
            stepvar += 1

        if not quiet:
            pb.finish()

        if extractPOIs:
            poiResult = self.extractPOI(pois=pois, result=result, quiet=quiet)
        else:
            poiResult = dict({})

        if len(output) > 0:
            np.save(output, [result, poiResult])

        return [result, poiResult]
Ejemplo n.º 16
0
 def compareStacks(self,
                   stacks=[],
                   fRange=[125E9, 145E9, 100E6],
                   errorAnalysis=True,
                   numCalc=100,
                   poi=[142.8, 137.3, 140.0],
                   parallelize=True,
                   numCores=2,
                   plot=True):
     stackResults = []
     print("Comparing stacks...")
     if not errorAnalysis:
         pb = progressBar(total=len(stacks))
         pb.show()
         itervar = 0
     for stack in stacks:
         self.clearLayers()
         self.clearWFR()
         self.defineStack(elem=stack['stackElement'], qty=stack['quantity'])
         if errorAnalysis:
             self.buildStack(useError=True)
             [studyResult,
              poiResult] = self.errorStudy(fRange=fRange,
                                           pois=poi,
                                           numCalc=numCalc,
                                           extractPOIs=True,
                                           parallelize=parallelize,
                                           numCores=numCores,
                                           quiet=False)
         else:
             result = self.calculateFrequencyResponse(fRange[0],
                                                      fRange[1],
                                                      fRange[2],
                                                      useError=True,
                                                      buildStack=True,
                                                      quiet=True)
             poiResult = self.extractPOI(pois=poi,
                                         result=result,
                                         quiet=True)
             itervar += 1
             pb.update(itervar)
         stackResults.append(poiResult)
     if not errorAnalysis:
         pb.finish()
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.set_prop_cycle(cycler('color', ['r', 'g', 'b', 'y', 'm', 'c']))
     matplotlib.rcParams['errorbar.capsize'] = 3
     for stackIndex in range(0, len(stackResults)):
         if errorAnalysis:
             ax.errorbar(stackResults[stackIndex]['poiFreqs'],
                         stackResults[stackIndex]['poiT'],
                         yerr=stackResults[stackIndex]['poiDevT'],
                         fmt="x",
                         label=stacks[stackIndex]['label'])
         else:
             ax.scatter(stackResults[stackIndex]['poiFreqs'],
                        stackResults[stackIndex]['poiT'],
                        label=stacks[stackIndex]['label'])
     ax.set_xlabel("Frequency [GHz]")
     ax.set_ylabel("Transmission [dB]")
     plt.legend()
     plt.grid()
     plt.show()
     return stackResults
def generateScrambledLinearAlignment(args):
    # need to encode utf8
    reload(sys)
    sys.setdefaultencoding('utf-8')

    # read the corpus
    corpusfile = args.corpus
    parser = make_parser()
    corpus = CorpusHandler(encoding="UTF-8", includedir=corpusfile)
    parser.setContentHandler(corpus)
    parser.parse(corpusfile)

    corpus_name = corpus.name

    xmlaligns = {}
    for xmlpath in args.xmlalignments.split(','):
        xmlaligns.update(copy.deepcopy(readAlignXML(xmlpath).content))

    orths2class = []
    orths2phones = []  # use dictionary for caching
    labels_of_si = []
    siPhones = []

    # now get the class labels using lexiconahndler: input every entry from actualOrth and state number
    lexParsers = []
    lexicons = []
    assert (args.lexiconfile)
    for index, lex in enumerate(args.lexiconfile.split(',')):
        lexParsers.append(make_parser())
        lexicons.append(LexiconHandler(int(args.states.split(',')[index])))
        lexParsers[index].setContentHandler(lexicons[index])
        lexParsers[index].parse(lex)
        orths2phones.append({})
        orths2class.append({})
        siPhones.append(lexicons[index].getLemmaByOrth("[SILENCE]").phon[0])
        labels_of_si.append(
            str(lexicons[index].getMixturesByPhon(siPhones[index])[0]))

    numStreams = len(lexicons)
    allpronsCache = []
    prefix = []
    postfix = []
    for i in range(0, numStreams):
        allpronsCache.append({})
        if i < args.scrambleStream:
            prefix.append("0")
        if i > args.scrambleStream:
            postfix.append("0")

    outFile = open(args.output, "wb")
    if args.verbose:
        print "number of files to processed: ", len(xmlaligns)
    else:
        progBar = progressbar.progressBar(0, len(xmlaligns), 77)
        FileCounter = 0
        progBar.draw()

    for rec in xmlaligns:
        lexMixLists = []
        if args.verbose:
            print "current processed segment is " + rec
        else:
            FileCounter += 1
            progBar.updateAmount(FileCounter)
            progBar.draw()

        index = args.scrambleStream
        for align in xmlaligns[rec]:
            orth = align[0].split("/")[0].split(" ")[0]
            if orth not in allpronsCache[index]:
                lemma = lexicons[index].getLemmaByOrth(orth)
                allpronsCache[index][orth] = []
                for pronunciation in lemma.phon:
                    allpronsCache[index][orth].extend(pronunciation)
                allpronsCache[index][orth] = list(
                    set(allpronsCache[index][orth]))
                if len(allpronsCache[index][orth]) > 1:
                    allpronsCache[index][orth].pop(
                        allpronsCache[index][orth].index(u'si'))

            #print "start", align[1], "end", align[2], "orth", orth
            pron_cnt = 0
            state = 0
            for frame in range(align[1], align[2] + 1):
                if pron_cnt >= len(allpronsCache[index][orth]):
                    pron_cnt = 0
                    state = 0
                #print pron_cnt, prefix, postfix
                #print len(allpronsCache[index][orth])
                phon = allpronsCache[index][orth][pron_cnt]
                mixtures = lexicons[index].getMixturesByPhon(phon)[state]
                lexMixLists.append(copy.deepcopy(prefix))
                lexMixLists[-1].append(str(mixtures))
                lexMixLists[-1].extend(postfix)
                #lexMixLists[-1].extend([orth])

                #print frame, "added", mixtures
                pron_cnt += 1

            encodedlist = ', '.join(map(unicode, allpronsCache[index][orth]))
            #print allpronsCache[index][orth]
            #print(u'[{}]'.format(encodedlist).encode('UTF-8'))
            #print rec

        if args.useOne:
            newPath = args.images + corpus_name + "/" + rec + "/1"
        else:
            newPath = args.images + corpus_name + "/" + rec
        #print newPath
        os.chdir(newPath)
        imageList = glob.glob(args.imageExtension)
        # sort the file List according to its names
        imageList.sort()
        imageListLen = len(imageList)
        #print imageListLen

        #print "xxxxxxxxxxx"
        #sys.exit(0)

        if args.outputC3Dstyle:
            # changed this output style, so we have the same number of outputs as inputs
            c3dStartFrame = 1
            for imageIndex in xrange(imageListLen):
                if imageIndex >= args.C3DclipLength / 2 and imageIndex < len(
                        lexMixLists) - args.C3DclipLength / 2:
                    c3dStartFrame += 1
                if imageIndex < imageListLen and imageIndex < len(lexMixLists):
                    outFile.write(newPath + '/' + str(imageList[imageIndex]) +
                                  ' ' + str(c3dStartFrame) + ' ' +
                                  ','.join(lexMixLists[imageIndex]) + '\n')
        else:
            if imageListLen <= len(lexMixLists):
                outFile.write(
                    ''.join(newPath + '/' + str(imageList[imageIndex]) + " " +
                            ','.join(lexMixLists[imageIndex]) + '\n'
                            for imageIndex in xrange(imageListLen)))
            else:
                outFile.write(
                    ''.join(newPath + '/' + str(imageList[imageIndex]) + " " +
                            ','.join(lexMixLists[imageIndex]) + '\n'
                            for imageIndex in xrange(len(lexMixLists))))
def generateClassLabel(args):

    # need to encode utf8
    reload(sys)
    sys.setdefaultencoding('utf-8')

    # Read the alignement Cache
    alignmentCaches = args.alignmentcache.split(',')
    acs = []
    for ac in alignmentCaches:
        if args.verbose:
            print ac
        acs.append(open_file_archive(ac))

    # Read the allophone file: allophone information will be matched
    # to the align cache file

    if args.allophonefile:
        for index, al in enumerate(args.allophonefile.split(',')):
            acs[index].setAllophones(al)

    # for each (image-allophone-state)  get its corresponding index from the lexicon
    # this will then be used as class labels for caffe
    lexicons = []
    parsers = []
    states = []
    states = map(int, args.states.split(','))
    for index, lex in enumerate(args.lexiconfile.split(',')):
        parsers.append(make_parser())
        lexicons.append(LexiconHandler(states[index]))
        parsers[-1].setContentHandler(lexicons[-1])
        parsers[-1].parse(lex)

    # to match every image with the mix from lexicon file:
    # read the path of the data from alignment cache and store it in the list "dataList"
    # but filter out the paths, which ends with a .attribs
    dataPathList = []

    #    print acs[0].file_list()
    #    exit()
    for key in acs[0].file_list(
    ):  #acs[0].ft:  # keys should be same in all caches,
        #therefore just going for the first alignment cache
        if (".attribs" not in key):
            dataPathList.append(key)

    # for each image get only its state values and allophone of each data and store it in "mixList"

    if args.verbose:
        print "number of files to processed: ", len(dataPathList)
    else:
        progBar = progressbar.progressBar(0, len(dataPathList), 77)
        FileCounter = 0
        progBar.draw()

    outFile = open(args.output, "wb")

    for i in xrange(len(dataPathList)):
        if args.verbose:
            print "current processed segment is " + str(
                i) + " th data", dataPathList[i]
        else:
            FileCounter += 1
            progBar.updateAmount(FileCounter)
            progBar.draw()
        #print "the name of the processed data is: " +
        #dataPathList[i]+ "\n"
        mixLists = []
        lexMixLists = []
        for index in xrange(len(acs)):
            acread = acs[index].read(dataPathList[i], "align")
            if args.verbose:
                for align in acread:
                    #print align
                    #print index
                    print acs[index].showAllophone(align[1]), " --> ", align
                    #print acs[index].allophones[align[1]], " --> ", align
            mixLists.append([(acs[index].showAllophone(align[1]).split('{')[0],
                              align[2]) for align in acread])

        # store the mixtureIndex from the lexicon using allophone name and state and store it in lexMixList
        for imageListIndex in xrange(len(mixLists[0])):
            lexMixLists.append([
                str(lexicons[innerindex].getMixturesByPhon(
                    mixLists[innerindex][imageListIndex][0])[
                        mixLists[innerindex][imageListIndex][1]])
                for innerindex in xrange(len(acs))
            ])
        newPath = "".join([args.images, dataPathList[i]])

        # rename Path(when 1 should not be used erase it from the path), check if args.useOne is set: this is for the case, when in some cases the file-directory  structure contains a 1,
        #  for further questions, ask Oscar Koller, Sepehr Zargaran, Ji-seung Shin
        if not args.useOne:
            newPath = newPath[:-2]
        if args.outputC3Dstyle:
            os.chdir(newPath)
            imageListLen = len(glob.glob(args.imageExtension))
            imageList = [''] * imageListLen
        else:
            os.chdir(newPath)
            imageList = glob.glob(args.imageExtension)
            # sort the file List according to its names
            imageList.sort()
            imageListLen = len(imageList)
        # write out
        if len(imageList) != len(lexMixLists):
            #for im in xrange(len(imageList)):
            #    print im, imageList[im]
            #print ""
            #for im in xrange(len(lexMixLists)):
            #    print im, lexMixLists[im]
            if not args.ignoreDifferentLengths:
                print 'error:', dataPathList[i], 'number of images ' + str(
                    len(imageList)) + ' != ' + str(
                        len(lexMixLists)) + ' alignment count '
                exit()
            else:
                print 'warning:', dataPathList[i], 'number of images ' + str(
                    len(imageList)) + ' != ' + str(
                        len(lexMixLists)) + ' alignment count '

        if args.outputC3Dstyle:
            # changed this output style, so we have the same number of outputs as inputs
            c3dStartFrame = 1
            for imageIndex in xrange(imageListLen):
                if imageIndex >= args.C3DclipLength / 2 and imageIndex < len(
                        lexMixLists) - args.C3DclipLength / 2:
                    c3dStartFrame += 1
                if imageIndex < imageListLen and imageIndex < len(lexMixLists):
                    outFile.write(newPath + '/' + str(imageList[imageIndex]) +
                                  ' ' + str(c3dStartFrame) + ' ' +
                                  ','.join(lexMixLists[imageIndex]) + '\n')
        else:
            if imageListLen <= len(lexMixLists):
                outFile.write(
                    ''.join(newPath + '/' + str(imageList[imageIndex]) + " " +
                            ','.join(lexMixLists[imageIndex]) + '\n'
                            for imageIndex in xrange(imageListLen)))
            else:
                outFile.write(
                    ''.join(newPath + '/' + str(imageList[imageIndex]) + " " +
                            ','.join(lexMixLists[imageIndex]) + '\n'
                            for imageIndex in xrange(len(lexMixLists))))
    outFile.close()
Ejemplo n.º 19
0
number_zero = 0
for z in er_normal:
    if z == 0:
        first_list.remove(first_list[z])
        number_zero += 1
    else:
        qwe = first_list.pop(z-number_zero)
        number_zero += 1
system("color 06")
system("cls")
print start_screen
print
print "-=- Formatting... Press [Ctrl + C] to Stop -=-".rjust(63)
print "\n"
chdir(folder_name)
try: prog = progressBar(0, len(first_list), 46)
except ZeroDivisionError:
    try:
        raw_input("\t\t\t      File Format Error...")
        exit()
    except KeyboardInterrupt: exit()
for j in range(len(first_list)):
    try:
        if isdir(first_list[j][3]) == False: mkdir(first_list[j][3])
        chdir(first_list[j][3])
        file_open = open(first_list[j][2]+".txt", "a")
        file_open.write(first_list[j][0]+"\t"+first_list[j][1]+"\n")
        file_open.close()
        chdir("..")
        if j+1 != len(first_list): prog.updateAmount(j)
        else: prog.updateAmount(len(first_list))