def sube(nombrearchivo):

    doc = fitz.open(nombrearchivo)
    salida = open(nombrearchivo + ".txt", "wb")
    for pagina in doc:
        texto = pagina.getText().encode("utf8")
        salida.write(texto.lower())
        salida.write(b"\n-----\n")
    salida.close()

    with open(nombrearchivo + '.txt', 'r', encoding='UTF8') as archivo:
        texto = archivo.read()

    stop_words = set(stopwords.words(fileids=('english', 'spanish')))

    word_tokens = word_tokenize(texto)

    word_tokens = list(
        filter(lambda token: token not in string.punctuation, word_tokens))

    #areglo=[]
    #word_tokens.append("--")
    filtro = []

    for palabra in word_tokens:
        if palabra not in stop_words:
            filtro.append(palabra)

    c = Counter(filtro)

    y = OrderedDict(c.most_common())
    with open(nombrearchivo + 'KEYWORDS.txt', 'w', encoding='UTF8') as far:
        for k, v in y.items():
            far.write(f"{k} {v}\n")
예제 #2
0
def create_vocabulary():
    """
    Using RNN_CONFIG['vocab_using_n_tweets'] tweets from the train.csv dataset, \n
    creates a vocabulary with RNN_CONFIG['AE_vocab_size']] words.\n
    The vocabulary is an ordered dictionary: the keys are the word radicals and the keys each word's index.\n
    :return: None, dumps the vocabulary as a .json file at data/vocab.json
    """
    with open(cfg['csv_relative_path'], newline='') as csvfile:
        data = list(csv.reader(csvfile))[1:]

    vocab = {}
    ps = PorterStemmer()

    for idx, line in enumerate(data[:RNN_CONFIG['vocab_using_n_tweets']]):
        printProgressBar(idx, RNN_CONFIG['vocab_using_n_tweets'],
                         'creating dictionary')
        for word in line[COLUMN_NAME_TO_IDX['text']].lower().split(' '):
            w = ps.stem(word)
            if w in vocab:
                vocab[w] += 1
            else:
                vocab[w] = 1

    # sort the vocabulary by descending occurrences
    vocab = OrderedDict([(k, idx) for idx, (k, _) in enumerate(
        sorted(vocab.items(), key=lambda item: item[1], reverse=True)
        [:RNN_CONFIG['AE_vocab_size']])])

    with open('data/vocab.json', 'w') as f:
        json.dump(vocab, f, indent=4)
예제 #3
0
파일: advanced.py 프로젝트: alexloser/bace3
def parse_argv(args: list, namemap: dict = None) -> dict:
    """ A simple helper for get arguments of __main__ function """
    d = OrderedDict()
    if not args:
        return d
    i = 1
    while True:
        if i >= len(args):
            break
        s = args[i]
        if s.startswith('-'):
            if namemap and s in namemap:
                s = namemap[s]
            if s not in d:
                d[s] = []
            while i + 1 < len(args) and args[i + 1][0] != '-':
                d[s].append(args[i + 1])
                i += 1
        i += 1
    for k, v in tuple(d.items()):
        if not v:
            d[k] = None
        elif len(v) == 1:
            d[k] = v[0]
    return d
예제 #4
0
def importSeqFiles(request, pk):
    template_name = 'importFiles.html'
    excel_file = request.FILES['excel_file']
    excel_file_content = excel_file.read().decode("utf-8")
    lines = excel_file_content.split("\n")
    runDict = OrderedDict()

    count = 1
    for line in lines:
        line = line.rstrip("\r")
        v = line.split(",")
        if (v[1] != ""):
            runName = v[1]
        if (v[0] != ""):
            expName = v[0]
        runDict[v[2]] = [expName, runName]
        count += 1
    if "File Path" in runDict:
        del runDict["File Path"]
    if " " in runDict:
        del runDict[" "]
    context = {}
    runDictSorted = sorted(runDict.items())
    context['runDict'] = runDictSorted
    project = Project.objects.get(pk=pk)
    context['project'] = project
    return (request, template_name, context)
예제 #5
0
def test_largescale():
    s = Stopwatch()
    integration_factor = 5
    device_map = device_parser.build_device_map(device_parser.parse_data('test.xml'))
    test_size = 10000
    histogram = OrderedDict()
    
    for i in range(5):
        time = 0.0

        for j in range(5):
            s.start()
            generate_test_input(device_map, test_size, file_name='test_input1.csv')
            s.stop()
            print('Generating test input of size {}: '.format(test_size), s.read())
        
            s.reset()
            s.start()
            analyze_data_nograph('csvs/test_input1.csv', integration_factor, device_map)
            s.stop()
            print('Processing input of size {}:     '.format(test_size), s.read())

            time += s.read()
            s.reset()
            
        print('Average time for input of size {}:  '.format(test_size), time/5)
        histogram[test_size] = time/5
        
        test_size *= 2

    print(histogram)
    
    for i,j in histogram.items():
        print(' size | time ')
        print('{0:5d}|{1:5f}'.format(i,j))
예제 #6
0
def createFasta(input_file,append_file,order):
    fastaInDict=dict()
    seq=OrderedDict()
    
    with open(input_file) as FASTAIN, open(append_file, "a") as APP, open(order) as ORD:
        fastaParse = SeqIO.parse(FASTAIN,"fasta")
        for fastaSeq in fastaParse:
            s = str(fastaSeq.seq)
            idFasta = fastaSeq.id
            fastaInDict[idFasta]=s
        for line in ORD:
            line=line.rstrip("\n")
            v=line.split("\t")
            val=v[0].split("__")
            start=int(val[1])-1
            end=int(val[2])
            k=val[0]
            chrom=v[1]
            if chrom in seq:
                seq[chrom].append(fastaInDict[k][start:end])
            else:
                seq[chrom]=[]
                seq[chrom].append(fastaInDict[k][start:end])
        for k,v in seq.items():
            print("Writing Chromosome"+str(k))
            APP.write(">"+k+"\n")
            APP.write("".join(v))
            APP.write("\n")
예제 #7
0
 def firstUniqChar(self, s: str) -> int:
     from _collections import OrderedDict
     order_dict = OrderedDict()
     for char in s:
         order_dict[char] = order_dict.get(char,0)+1
     for key,val in order_dict.items():
         if val == 1:
             return s.index(key)
     return -1
예제 #8
0
class BasicNet:
    """

    """
    def __init__(self, *args, **kwargs):
        """

        """
        self.results = OrderedDict()
        self.y = None
        self.x = None
        self.layers = OrderedDict()

        for layer in args:
            layer_name = layer.name
            self.layers[layer_name] = layer

        self.name = kwargs.get('name', str(random.randint(0, 2**5)))

    def forward(self, x, *args, **kwargs):
        """

        """

        self.x = x
        y = x
        self.results['input'] = {'x': y}

        for layer_name, layer in self.layers.items():
            y = layer.forward(y, *args, **kwargs)

            self.results[layer_name] = {'x': y}

        self.y = y
        return self.y

    def __str__(self):

        printable = f'Net {self.name}:'
        template = '\n\t{}'
        for layer_name, layer in self.layers.items():
            printable += template.format(layer.__str__())

        return printable
예제 #9
0
 def unpack(self, action: OrderedDict) -> OrderedDict:
     newactions = {}
     for actid, value in action.items():
         if actid in self._subdicts:
             origactions = self._subdicts[actid].getById(value)
             for origid, origact in origactions.items():
                 newactions[origid] = origact
         else:
             newactions[actid] = value
     return OrderedDict(newactions)
예제 #10
0
    def stringPermutation(orgString):
        char_count_map = {};
        for temp_char in orgString:
            if temp_char not in char_count_map:
                char_count_map[temp_char] = 1;
                continue;
            char_count_map[temp_char] += 1;

        char_count_map = OrderedDict(sorted(char_count_map.items(), key=lambda t: t[0]));

        temp_arr = [];
        string_permutaion_list = [];
        Util.string_permutation_helper(len(orgString), char_count_map, temp_arr, string_permutaion_list)
#         print(len(string_permutaion_list))
        print(string_permutaion_list)
예제 #11
0
 def unpack(self, action:OrderedDict) -> OrderedDict:
     '''
     @action a packed action. Utility function.
     action must be OrderedDict with the keys
     the packed-entity labels.
     @return an unpacked version of the packed action
     '''
     newactions = {}
     for actid, value in action.items():
         if actid in self._subdicts:
             origactions = self._subdicts[actid].getById(value)
             for origid, origact in origactions.items():
                 newactions[origid] = origact
         else:
             newactions[actid] = value
     return OrderedDict(newactions)
예제 #12
0
    def make_theano_tick(self):
        """Generate the theano function for running the network simulation.
        
        :returns: theano function
        """
        # dictionary for all variables
        # and the theano description of how to compute them 
        updates = OrderedDict()

        # for every node in the network
        for node in self.nodes.values():
            # if there is some variable to update
            if hasattr(node, 'update'):
                # add it to the list of variables to update every time step
                updates.update(node.update(self.dt))

        # create graph and return optimized update function
        return theano.function([], [], updates=updates.items())#, mode='ProfileMode')
예제 #13
0
    def make_theano_tick(self):
        """Generate the theano function for running the network simulation.
        
        :returns: theano function
        """

        # dictionary for all variables
        # and the theano description of how to compute them
        updates = OrderedDict()

        # for every node in the network
        for node in self.nodes.values():
            # if there is some variable to update
            if hasattr(node, 'update'):
                # add it to the list of variables to update every time step
                updates.update(node.update(self.dt))

        # create graph and return optimized update function
        return theano.function([], [], updates=updates.items())
예제 #14
0
def get_importance(df_train,
                   df_dev,
                   y_name,
                   metric='acc',
                   excluded_vars=None,
                   **kwargs):
    """
    This function perform the random permutation and return an ordered dict with keys to be the variable names, and metric to be the metric.
    :param df_train: Training dataset.
    :param df_dev: Dev dataset.
    :param y_name: The name of the y variable.
    :param metric: String. Can be 'acc', 'micro_f1', 'macro_f1', 'precision', 'recall'
    :param excluded_vars:  List. These variables should not be permuted, and will not enter the selection. An example of such variable is the ID,
    :param kwargs: Arguments for logistic regression.
    :return: An ordered dict containing the metrics.
    """
    df_train_copy = df_train.copy(deep=True)
    df_dev_copy = df_dev.copy(deep=True)

    y_dev = df_dev_copy[y_name]

    vars_to_permute = set(df_train.columns.values).difference(
        set(excluded_vars))

    y_pred = get_classification_result(df_train_copy, df_dev, y_name,
                                       excluded_vars, **kwargs)
    metric_score = get_metric(y_pred, y_dev, metric)

    result = OrderedDict()
    result['original'] = metric_score

    for var in tqdm(vars_to_permute):
        df_train_permute = df_train_copy.copy(deep=True)
        df_train_permute[var] = np.random.permutation(df_train_permute[var])
        y_pred = get_classification_result(df_train_permute, df_dev, y_name,
                                           excluded_vars, **kwargs)
        metric_score = get_metric(y_pred, y_dev, metric)
        result['var'] = metric_score

    result = OrderedDict(sorted(result.items(), key=operator.itemgetter(1)))
    return result
예제 #15
0
def readFastq(fastq):
    sequence, qual = OrderedDict(), OrderedDict()
    with uopen(fastq) as fin:
        line = fin.readline()
        if not line.startswith('@'):
            sequence = readFasta(fastq)
            return sequence, OrderedDict(
                [n, re.sub(r'[^!]', 'I', re.sub(r'[^ACGTacgt]', '!', s))]
                for n, s in sequence.items())
    with uopen(fastq) as fin:
        for lineId, line in enumerate(fin):
            if lineId % 4 == 0:
                name = line[1:].strip().split()[0]
                sequence[name] = []
                qual[name] = []
            elif lineId % 4 == 1:
                sequence[name].extend(line.strip().split())
            elif lineId % 4 == 3:
                qual[name].extend(line.strip().split())
    for s in sequence:
        sequence[s] = (''.join(sequence[s])).upper()
        qual[s] = ''.join(qual[s])
    return sequence, qual
예제 #16
0
def prepare_data(train, test, eval_sessions_neg_samples):
    sess_clicks = OrderedDict()

    sess_date_tr = OrderedDict()
    ctr = 0
    curid = -1
    curdate = None
    for tr in train.itertuples():
        # for data in reader:
        sessid = tr.SessionId
        if curdate and not curid == sessid:
            # date = ''
            # if opt.dataset == 'yoochoose':
            #     date = time.mktime(time.strptime(curdate[:19], '%Y-%m-%dT%H:%M:%S'))
            # else:
            #     date = time.mktime(time.strptime(curdate, '%Y-%m-%d'))
            date = tr.Time  # SARA
            sess_date_tr[curid] = date
        curid = sessid
        # if opt.dataset == 'yoochoose':
        #     item = data['item_id']
        # else:
        #     item = data['item_id'], int(data['timeframe']) todo: timeframe ?!
        item = int(tr.ItemId)  # SARA
        # curdate = ''
        # if opt.dataset == 'yoochoose':
        #     curdate = data['timestamp']
        # else:
        #     curdate = data['eventdate']
        curdate = tr.Time  # SARA
        if sessid in sess_clicks:
            sess_clicks[sessid] += [item]
        else:
            sess_clicks[sessid] = [item]
        ctr += 1
    # date = ''
    # if opt.dataset == 'yoochoose':
    #     date = time.mktime(time.strptime(curdate[:19], '%Y-%m-%dT%H:%M:%S'))
    # else:
    #     date = time.mktime(time.strptime(curdate, '%Y-%m-%d'))
    #     for i in list(sess_clicks):   #this use digenetica 'timeframe' to sort items of each session. sess_clicks will be list of item_id s which are sorted based on their timeframe
    #         sorted_clicks = sorted(sess_clicks[i], key=operator.itemgetter(1))
    #         sess_clicks[i] = [c[0] for c in sorted_clicks]
    # sess_date[curid] = date
    sess_date_tr[curid] = tr.Time

    sess_date_ts = OrderedDict()
    ctr = 0
    curid = -1
    curdate = None
    for index, ts in test.iterrows():
        sessid = ts.SessionId
        if curdate and not curid == sessid:
            date = ts.Time
            sess_date_ts[curid] = date
        curid = sessid
        item = ts.ItemId
        curdate = ts.Time
        if sessid in sess_clicks:
            sess_clicks[sessid] += [item]
        else:
            sess_clicks[sessid] = [item]
        ctr += 1
    sess_date_ts[curid] = ts.Time

    tra_sess = list(sess_date_tr.items())
    tes_sess = list(sess_date_ts.items())

    # Sort sessions by date MALTE: not needed
    #     tra_sess = sorted(tra_sess, key=operator.itemgetter(1))  # [(session_id, timestamp), (), ]
    #     tes_sess = sorted(tes_sess, key=operator.itemgetter(1))  # [(session_id, timestamp), (), ]

    # Choosing item count >=5 gives approximately the same number of items as reported in paper
    tra_ids, tra_dates, tra_seqs, item_dict, reversed_item_dict = obtian_tra(
        tra_sess, sess_clicks)

    #tes_ids, tes_dates, tes_seqs = obtian_tes(tes_sess, sess_clicks, item_dict)
    tes_ids, tes_dates, tes_seqs, tes_neg_samples_ids, count_clicks_in_test_items_not_in_train_set = obtian_tes_with_neg_samples(
        tes_sess, sess_clicks, item_dict, eval_sessions_neg_samples)

    tr_seqs, tr_dates, tr_labs, tr_ids = process_seqs(tra_seqs, tra_dates)
    #te_seqs, te_dates, te_labs, te_ids = process_seqs(tes_seqs, tes_dates)
    te_seqs, te_dates, te_labs, te_ids, te_neg_samples = process_seqs_with_neg_samples(
        tes_seqs, tes_dates, tes_neg_samples_ids)

    #print("te_seqs", te_seqs[:5])
    #print("te_labs", te_labs[:5])
    #print("te_neg_samples", te_neg_samples[:5])

    tra = (tr_seqs, tr_labs)
    tes = (te_seqs, te_labs, te_neg_samples)

    return tra, tes, item_dict, reversed_item_dict, count_clicks_in_test_items_not_in_train_set
예제 #17
0
파일: orderdict.py 프로젝트: TitanVA/Metiz
from _collections import OrderedDict

favorite_languages = OrderedDict()
favorite_languages['jen'] = 'python'
favorite_languages['sarah'] = 'c'
favorite_languages['edward'] = 'ruby'
favorite_languages['phil'] = 'python'
for name, language in favorite_languages.items():
    print(name.title() + '\'s favorite language is', language.title() + '.')
예제 #18
0
class App:
    def __init__(self):
        self.root = Tk()
        self.block_size = IntVar()
        self.current_column_step = IntVar()
        self.block_size.set(3)
        self.isDecode = False

    """Увеличиаем размер блока"""

    def update_block_size(self):
        self.block_size.set(self.spin.get())

    """Выделяем блок мышкой"""

    def markArea(self, event):
        # Если есть таблица какая-нибудь
        if (self.isDecode and event.x > 30):
            self.firstCanvas.delete("hello")
            # Надо выделить область по которой кликнули мышкой
            column_size = 270 // self.block_size.get()
            # определяем в какой мы колонке
            our_column = event.x // column_size
            # выделяем
            self.firstCanvas.create_rectangle(40 + our_column * column_size,
                                              25,
                                              column_size +
                                              column_size * our_column + 20,
                                              30 + 15 * len(self.table[0]),
                                              outline="#ff0000",
                                              tags="hello")
            self.marked = our_column
            # Передаем данные полученные из частотного анализа
            self.draw_data(self.marked)
            if (our_column < self.block_size.get()):
                # передаем в ползунок
                self.current_column_step.set(self.steps[self.marked])

    """"""

    def peredecode(self, event):
        if (self.isDecode and self.marked < self.block_size.get()):
            self.steps[self.marked] = self.current_column_step.get()
            # Теперь перерисовываем
            self.create_message(self.steps, self.table)

    def draw_data(self, item):
        self.fq.delete("all")
        column = 0
        j = 0
        for (i, letter) in enumerate(self.super_dicts[item].keys()):
            if (10 + 15 * j > 350):
                column += 1
                j = 0
            self.fq.create_text(20 + 30 * column,
                                10 + 15 * j,
                                text=letter + ": " +
                                str(self.super_dicts[item].get(letter)))
            j += 1

    def makeWidgets(self):
        self.firstFrame = Frame(self.root)
        self.firstFrame.pack(side=LEFT, anchor="nw", padx=10, pady=10)
        Label(self.firstFrame, text="Таблица для исходного сообщения").pack()
        self.firstAlterFrame = Frame(self.firstFrame)
        self.firstAlterFrame.pack()
        self.firstCanvas = Canvas(self.firstAlterFrame,
                                  width=300,
                                  height=500,
                                  bg="#ffffff")
        self.firstCanvas.bind("<Button-1>", self.markArea)
        self.firstCanvas.pack(side=LEFT)
        self.firstCanvasScroll = Scrollbar(self.firstAlterFrame,
                                           orient='vertical',
                                           command=self.firstCanvas.yview)
        self.firstCanvasScroll.pack(side=RIGHT, fill=Y, expand=True)
        self.firstCanvas.configure(yscrollcommand=self.firstCanvasScroll.set)
        Label(self.firstFrame, text="Введите исходное сообщение").pack(pady=10)
        self.firstText = Text(self.firstFrame, width=37, height=10)
        self.firstText.pack()

        self.secondFrame = Frame(self.root)
        self.secondFrame.pack(side=LEFT, anchor="n", padx=10, pady=10)
        Label(self.secondFrame, text="Таблица для конечного сообщения").pack()
        self.secondAlterFrame = Frame(self.secondFrame)
        self.secondAlterFrame.pack()
        self.secondCanvas = Canvas(self.secondAlterFrame,
                                   width=300,
                                   height=500,
                                   bg="#ffffff")
        self.secondCanvas.pack(side=LEFT)
        self.secondCanvasScroll = Scrollbar(self.secondAlterFrame,
                                            orient='vertical',
                                            command=self.secondCanvas.yview)
        self.secondCanvasScroll.pack(side=RIGHT, fill=Y, expand=True)
        self.secondCanvas.configure(yscrollcommand=self.secondCanvasScroll.set)
        Label(self.secondFrame, text="Конечный результат").pack(pady=10)
        self.secondText = Text(self.secondFrame, width=37, height=10)
        self.secondText.pack()

        self.lastFrame = Frame(self.root)
        self.lastFrame.pack(side=LEFT, anchor="n", padx=10, pady=10)
        Label(self.lastFrame, text="Панель управления").pack()
        Label(self.lastFrame, text="Размер блока").pack(pady=10)
        self.spin = Spinbox(self.lastFrame,
                            from_=0,
                            textvariable=self.block_size,
                            to=10,
                            command=self.update_block_size)
        self.spin.pack()
        Label(self.lastFrame, text="Частотный анализ").pack(pady=10)
        self.fq = Canvas(self.lastFrame, width=150, height=400, bg="#ffffff")
        self.fq.pack()
        # Наш ползунок
        Label(self.lastFrame, text="Ручная корректировка").pack(pady=10)
        self.scale = Scale(self.lastFrame,
                           from_=0,
                           to=33,
                           length=150,
                           tickinterval=6,
                           orient='horizontal',
                           showvalue=YES,
                           variable=self.current_column_step,
                           command=self.peredecode)
        self.scale.pack()
        Button(self.lastFrame,
               text="Расшифровать!",
               width=15,
               height=5,
               command=self.decode).pack(pady=20)

    """Инициализирукм гуй"""

    def start(self):
        self.root.title("Лабораторная работа номер 2")
        # self.root.geometry("900x500")
        self.makeWidgets()
        self.root.mainloop()

    def decode(self):
        self.isDecode = True
        self.super_dicts = []
        # Берем сообщение из текстового блока и загоняем в столбец по размеру блока
        self.table = ['' for i in range(self.block_size.get())]
        message = self.firstText.get("1.0", 'end-1c')
        for (i, letter) in enumerate(message):
            self.table[i % self.block_size.get()] += letter
        self.draw(self.firstCanvas, self.table)
        # Теперь мы должны сделать словари с частотными анализами для каждой колонки ох, говорю я
        self.steps = [
            self.analis(self.table[i]) for i in range(self.block_size.get())
        ]
        # Так, у нас массив сдвигов, нам надо каждый столбец сдвинуть на это число
        # Нам нужен не только массив сдвигов, но и словари с частоными анализами
        # Теперь берем каждую строку и сдвигаем на сдвиги, но это все в отдельной функции
        self.create_message(self.steps, self.table)

    """Собираем исходное сообщение"""

    def create_message(self, steps, table):
        self.alterTable = ['' for i in table]
        for i in range(len(table)):
            self.alterTable[i] = App.caesar(table[i], 33 - steps[i])
        self.draw(self.secondCanvas, self.alterTable)
        out = ''
        for i in range(len(self.alterTable[0])):
            for j in range(self.block_size.get()):
                try:
                    out += self.alterTable[j][i]
                except IndexError:
                    out += ''
        self.secondText.delete('1.0', "end")
        self.secondText.insert('1.0', out)

    """Функция для сдвига по шифру цезаря Можно конечно в одну строчку через bytes.translate, но этот вариант мне больше нравится"""

    def caesar(message, step):
        alpha = [
            ' ', 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 'и', 'й', 'к', 'л',
            'м', 'н', 'о', 'п', 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 'ш',
            'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я'
        ]
        res = []
        for item in range(len(alpha)):
            if item + step >= len(alpha):
                res += alpha[item + step - len(alpha)]
            elif item + step < 0:
                res += alpha[len(alpha) - item + step]
            else:
                res += alpha[item + step]
        wow = list(zip(alpha, res))
        msq = ''
        for letter in message:
            for item in wow:
                if letter == item[0]:
                    msq += item[1]
                    break
            else:
                msq += letter
        return msq

    """Анализ частотностит символов"""

    def analis(self, column):
        alphabet = " абвгдежзийклмнопрстуфхцчшщъыьэюя"
        self.dicts = {letter: 0 for letter in alphabet}
        dict_step = {letter: i for (i, letter) in enumerate(alphabet)}
        for i in column:
            self.dicts[i] += 1
        self.dicts = OrderedDict(
            sorted(self.dicts.items(), key=lambda t: -t[1]))
        self.super_dicts.append(self.dicts)
        azaza = list(self.dicts.keys())
        return dict_step.get(azaza[0])

    """Заполнянм кавас столбиками с буквами"""

    def draw(self, obj, table):
        # наша исходная таблица готова, нам необходимо теперь заполнить канвас
        obj.delete("all")
        obj.create_line(30, 0, 30, 30 + len(table[0]) * 15, fill="#000000")
        obj.create_line(30, 20, 300, 20, fill="#000000")
        # Делим оставшееся пространство для того что бы положить туда столбцы
        column_size = 270 // self.block_size.get()
        # теперь в цикле раставляем цифры и сообщение
        for i in range(self.block_size.get()):
            obj.create_text(column_size / 1.2 + column_size * i,
                            10,
                            text=str(i + 1))
            for (j, letter) in enumerate(table[i]):
                obj.create_text(column_size / 1.2 + column_size * i,
                                30 + j * 15,
                                text=letter)
                # Нумерация строк
        for i in range(len(table[0])):
            obj.create_text(10, 30 + i * 15, text=str(i + 1))
        obj.configure(scrollregion=obj.bbox("all"))
from _collections import OrderedDict

favorite_languages = OrderedDict()

favorite_languages['jen'] = 'python'
favorite_languages['sarah'] = 'c'
favorite_languages['edward'] = 'ruby'
favorite_languages['phil'] = 'python'

# Since we used the ordered dictionary class from the builtin module it will print the dictionary in the order we
# entered it.
for name, language in favorite_languages.items():
    print(name.title() + "'s favorite language is " + language.title())

# You can still sort the ordered dictionary if you like.
for name, language in sorted(favorite_languages.items()):
    print(name.title() + "'s favorite language is " + language.title())
예제 #20
0
from _collections import OrderedDict
import operator

command = input()
courses = dict()
while command != "end":
    command = command.split(' : ')
    course = command[0]
    student = command[1]
    if course not in courses:
        courses[course] = [student]
    else:
        courses[course].append(student)
    command = input()
ordered = OrderedDict(
    sorted(courses.items(), key=lambda c: len(c[1]), reverse=True))
for k, v in ordered.items():
    print(f"{k}: {len(v)}")
    for value in sorted(v):
        print(f"-- {value}")
예제 #21
0
    def _report_textual_results(self, tally_coll, res_dir):
        '''
        Give a sequence of tallies with results
        from a series of batches, create long
        outputs, and inputs lists from all tallies
        
        Computes information retrieval type values:
             precision (macro/micro/weighted/by-class)
             recall    (macro/micro/weighted/by-class)
             f1        (macro/micro/weighted/by-class)
             acuracy
             balanced_accuracy
        
        Combines these results into a Pandas series, 
        and writes them to a csv file. That file is constructed
        from the passed-in res_dir, appended with 'ir_results.csv'.
        
        Finally, constructs Github flavored tables from the
        above results, and posts them to the 'text' tab of 
        tensorboard.
        
        Returns the results measures Series 
        
        :param tally_coll: collect of tallies from batches
        :type tally_coll: ResultCollection
        :param res_dir: directory where all .csv and other 
            result files are to be written
        :type res_dir: str
        :return results of information retrieval-like measures
        :rtype: pandas.Series
        '''

        all_preds = []
        all_labels = []

        for tally in tally_coll.tallies(phase=LearningPhase.TESTING):
            all_preds.extend(tally.preds)
            all_labels.extend(tally.labels)

        res = OrderedDict({})
        res['prec_macro'] = precision_score(all_labels,
                                            all_preds,
                                            average='macro',
                                            zero_division=0)
        res['prec_micro'] = precision_score(all_labels,
                                            all_preds,
                                            average='micro',
                                            zero_division=0)
        res['prec_weighted'] = precision_score(all_labels,
                                               all_preds,
                                               average='weighted',
                                               zero_division=0)
        res['prec_by_class'] = precision_score(all_labels,
                                               all_preds,
                                               average=None,
                                               zero_division=0)

        res['recall_macro'] = recall_score(all_labels,
                                           all_preds,
                                           average='macro',
                                           zero_division=0)
        res['recall_micro'] = recall_score(all_labels,
                                           all_preds,
                                           average='micro',
                                           zero_division=0)
        res['recall_weighted'] = recall_score(all_labels,
                                              all_preds,
                                              average='weighted',
                                              zero_division=0)
        res['recall_by_class'] = recall_score(all_labels,
                                              all_preds,
                                              average=None,
                                              zero_division=0)

        res['f1_macro'] = f1_score(all_labels,
                                   all_preds,
                                   average='macro',
                                   zero_division=0)
        res['f1_micro'] = f1_score(all_labels,
                                   all_preds,
                                   average='micro',
                                   zero_division=0)
        res['f1_weighted'] = f1_score(all_labels,
                                      all_preds,
                                      average='weighted',
                                      zero_division=0)
        res['f1_by_class'] = f1_score(all_labels,
                                      all_preds,
                                      average=None,
                                      zero_division=0)

        res['accuracy'] = accuracy_score(all_labels, all_preds)
        res['balanced_accuracy'] = balanced_accuracy_score(
            all_labels, all_preds)

        res_series = pd.Series(list(res.values()), index=list(res.keys()))

        # Write information retrieval type results
        # to a one-line .csv file, using pandas Series
        # as convenient intermediary:
        res_csv_path = os.path.join(res_dir, 'ir_results.csv')
        res_series.to_csv(res_csv_path)

        res_rnd = {}
        for meas_nm, meas_val in res.items():

            # Measure results are either floats (precision, recall, etc.),
            # or np arrays (e.g. precision-per-class). For both
            # cases, round each measure to one digit:

            res_rnd[meas_nm] = round(meas_val,1) if type(meas_val) == float \
                                                 else meas_val.round(1)

        ir_measures_skel = {
            'col_header': ['precision', 'recall', 'f1'],
            'row_labels': ['macro', 'micro', 'weighted'],
            'rows': [[
                res_rnd['prec_macro'], res_rnd['recall_macro'],
                res_rnd['f1_macro']
            ],
                     [
                         res_rnd['prec_micro'], res_rnd['recall_micro'],
                         res_rnd['f1_micro']
                     ],
                     [
                         res_rnd['prec_weighted'], res_rnd['recall_weighted'],
                         res_rnd['f1_weighted']
                     ]]
        }

        ir_per_class_rows = [[
            prec_class, recall_class, f1_class
        ] for prec_class, recall_class, f1_class in zip(
            res_rnd['prec_by_class'], res_rnd['recall_by_class'],
            res_rnd['f1_by_class'])]
        ir_by_class_skel = {
            'col_header': ['precision', 'recall', 'f1'],
            'row_labels': self.class_names,
            'rows': ir_per_class_rows
        }

        accuracy_skel = {
            'col_header': ['accuracy', 'balanced_accuracy'],
            'row_labels': ['Overall'],
            'rows': [[res_rnd['accuracy'], res_rnd['balanced_accuracy']]]
        }

        ir_measures_tbl = GithubTableMaker.make_table(ir_measures_skel,
                                                      sep_lines=False)
        ir_by_class_tbl = GithubTableMaker.make_table(ir_by_class_skel,
                                                      sep_lines=False)
        accuracy_tbl = GithubTableMaker.make_table(accuracy_skel,
                                                   sep_lines=False)

        # Write the markup tables to Tensorboard:
        self.writer.add_text('Information retrieval measures',
                             ir_measures_tbl,
                             global_step=0)
        self.writer.add_text('Per class measures',
                             ir_by_class_tbl,
                             global_step=0)
        self.writer.add_text('Accuracy', accuracy_tbl, global_step=0)

        return res_series
        texto = miTexto.read().lower() #leemos y guardamos el archivo en texto, ademas de pasar todo a minisculas
        result = re.sub(r'[^\w\s]','',texto) #eliminamos las comillas simples, comillas dobles, parentesis, etc.

    stop_words = set(stopwords.words('spanish')) #indicamos una "lista" de pronombres en español
    word_tokens = word_tokenize(result) #separamos las cadenas del texto en palabras

    word_tokens = list(filter(lambda token: token not in  string.punctuation,word_tokens)) #eliminamos los signos de puntuacion

    filtro = [] #arreglo que nos servira como filtro más adelante

    for palabra in word_tokens: #verificamos cada palabra que hay en la lista sin singnos de puntuacion
        if palabra not in stop_words: #verificamos si la palabra no esta en la lista de pronombres
            filtro.append(palabra) #si no esta, la añadimos a arreglo filtro

    c=Counter(filtro) #contamos las palabras que se repitan en el arreglo de filtro
    val = input("Ingrese el numero de palabras que desee que se muestren: ") #pedimos cuantas palabras repetidas quiere el usuario que se muestren
    try: #try para validar que ingrese un numero y no una cadena
        num = int(val) #convertimos el input de string a entero
        y=OrderedDict(c.most_common(num)) #le indicamos que las ordene de las más repetidas a las que menos se repitan, solo se mostraran el numero de palabras que se haya ingresado anteriormente
        print(y) #imprimimos en consola las palabras 

        with open('revision.txt','w',encoding="utf-8") as file: #creamos/reescribimos un archivo llamado revision para escribir las palabras
            for k,v in y.items(): #recorremos y que son las palabras mas comunes, "k" es la palabra y "v" es el numero que se repiten
                #no estamos imprimiendo el numero que se repiten, solo las palabras, por eso abajo solo se imprime "k"
                file.write(k + "\n")#en el archivo escribimos las palabras que hay en "y" con un salto de linea 
    except ValueError: #si el numero de palabras a mostrar no es un numero muestra el mensaje y termina el programa
        print("La entrada no es un numero entero.") #imprimimos el mensaje
    
else: #si no hay un archvio con el nombre ingresado en la ruta, se muestra el mensaje y termina el programa
    print("No existe el archivo en la ruta.")#imprimimos el mensaje.
예제 #23
0
# Collections.OrderedDict()

from _collections import OrderedDict

OD = OrderedDict()

N = int(input())

for i in range(N):
    item_price = input().split()
    item = ''
    price = ''
    for j in item_price:
        if j.isalpha():
            item = item + j + ' '
        else:
            price = price + j

    price = int(price)
    item = item[:-1]

    if item in OD:
        OD[item] = OD[item] + price
    else:
        OD[item] = price

for i,j in OD.items():
    print(i, j)
예제 #24
0
    "Gibrael": {
        "age": 10,
        "address": "Sesimbra"
    },
    "Susan": {
        "age": 11,
        "address": "Lisbon"
    },
    "Charles": {
        "age": 9,
        "address": "Sesimbra"
    },
}

# use the items() method to return a dictionary's keys their values
for k, v in dict1.items():
    print(k, v)

# to iterate over the values
for v in dict2.values():
    print(v)

# to iterate over the keys:
for k in dict3:
    print(k)

# to iterate over a nested dictionary, just remember that the value of
# the parent dictionary is second dictionary that you must extract the key,value pair from

for k1, v1 in dict3.items():
    print(k1)
예제 #25
0
d['d'] = 4
d['e'] = 5

print(d)

for k, v in d.items():
    print(k, v)

dict = OrderedDict()
dict['a'] = 1
dict['d'] = 4
dict['e'] = 5
dict['b'] = 2
dict['c'] = 3

for k, v in dict.items():
    print(k, v)

if d == dict:
    print('Equals')
else:
    print('Different')

dict2 = OrderedDict()
dict2['a'] = 1
dict2['d'] = 4
dict2['b'] = 2
dict2['e'] = 5
dict2['c'] = 3

if dict2 == dict:
# Имеется log-файл с ip-адресами, проанализировать последние N адресов и
# сохранить в новый файл пары значений "ip-адрес - кол-во запросов"

from _collections import OrderedDict, defaultdict, deque

N = 3000
with open('big_log.txt', 'r', encoding='utf-8') as f:
    log = deque(f, N)

print(log)

data = OrderedDict()
spam = defaultdict(int)

for item in log:
    ip = item[:-1]

    if not ip.startswith('192.168'):
        spam[ip] += 1
        data[ip] = 1

print(spam)
print(data)

data.update(spam)
print(data)

with open('data.txt', 'w', encoding='utf-8') as f:
    for key, value in data.items()():
        f.write(f'{key} - {value}\n')
            target_drop = rel_drop[i]
            rel_drop[i] = 0

            trackers[cls].add(min(np.min(rel_drop),0)/target_drop * 100)

    return [trackers[k].get() for k in cifar10_classes]

runs = OrderedDict()
runs["Simple"] = "cifar10"
runs["No dropout"] = "cifar10_no_dropout"
runs["Resnet 110"] = "cifar10_resnet"

rel_drops = {}
worsts = {}

for k, v in runs.items():
    r = lib.get_runs([v])
    print("Downloading", k)
    download(v, r)
    rel_drops[k] = get_relative_drop(v, r)
    worsts[k] = get_worst(v, r)

def draw_stats(stats, shape):
    fig = plt.figure(figsize=shape)
    for i, k in enumerate(runs.keys()):
        stat = stats[k]
        plt.bar([x * (len(runs) + 1) + i for x in range(len(stat))], [s.mean for s in stat],
                yerr=[s.std for s in stat], align='center')

    plt.xticks([(len(runs) + 1) * x + (len(runs) - 1) / 2 for x in range(len(cifar10_classes))], cifar10_classes)
    for tick in fig.axes[0].xaxis.get_major_ticks()[1::2]:
예제 #28
0
def importSeqFiles(request, prj_pk):
    template_name = 'importFiles.html'
    excel_file = request.FILES['excel_file']
    excel_file_content = excel_file.read().decode("utf-8")
    lines = excel_file_content.rstrip().split("\n")
    project = Project.objects.get(pk=prj_pk)
    exsisting_files = SeqencingFile.objects.filter(
        project=project).values_list('sequencingFile_mainPath', flat=True)
    runDict = OrderedDict()
    dupDict = OrderedDict()
    orderList = []
    context = {}
    count = 1
    for line in lines:
        line = line.rstrip("\r")
        v = line.split(",")
        if (count == 1):
            exName = seqRun = filePath = md5sum = sha256 = False
            runName = expName = path = ""

            if ("Experiment Name" in v):
                exName = True
                exNameIndx = v.index("Experiment Name")
                orderList.append("Experiment")
            if ("Sequencing Run Name" in v):
                seqRun = True
                seqRunIndx = v.index("Sequencing Run Name")
                orderList.append("Run")
            if ("File Path" in v):
                filePath = True
                filePathIndx = v.index("File Path")
            if ("md5sum" in v):
                md5sum = True
                md5sumIndx = v.index("md5sum")
                orderList.append("md5sum")
            if ("sha256sum" in v):
                sha256 = True
                sha256Indx = v.index("sha256sum")
                orderList.append("sha256sum")
        else:
            if (seqRun == True and v[seqRunIndx] != ""):
                runName = v[seqRunIndx]
            if (exName == True and v[exNameIndx] != ""):
                expName = v[exNameIndx]
            if (filePath == True and v[filePathIndx] != ""):
                path = v[filePathIndx]

            if (expName != "" and path != ""):
                runDict[v[filePathIndx]] = [expName, runName]

            else:
                messages.error(
                    request, "something is not correct with your input file.")
                break

            if (md5sum == True and v[md5sumIndx] != ""):
                md5Sum = v[md5sumIndx]
                runDict[v[filePathIndx]].append(md5Sum)
                context['md5sum'] = True
            else:
                context['md5sum'] = False

            if (sha256 == True and v[sha256Indx] != ""):
                sha256sum = v[sha256Indx]
                runDict[v[filePathIndx]].append(sha256sum)
                context['sha256sum'] = True
            else:
                context['sha256sum'] = False

            if (path in exsisting_files):
                dupDict[path] = runDict[path]
                del runDict[path]
        count += 1

    if " " in runDict:
        del runDict[" "]

    runDictSorted = sorted(runDict.items())
    dupDictSorted = sorted(dupDict.items())
    context['runDict'] = runDictSorted
    context['dupDict'] = dupDictSorted
    context['orderList'] = orderList
    context['project'] = project

    return (request, template_name, context)
예제 #29
0
class Transect(sm.CustomObject):
    base_type = "transect"
    type = "transect"
    datum = "top of face"

    def __init__(self, **kwargs):
        super(Transect, self).__init__()
        self._locs = OrderedDict()
        self.name = kwargs.get("name", None)
        start = kwargs.get("start", (0, 0))  # coords (lat, long)
        end = kwargs.get("end", (0, 0))
        self.s_coords = Coords(lat=start[0], lon=start[1])
        self.e_coords = Coords(lat=end[0], lon=end[1])
        self.ug_values = []
        self.ug_xs = []
        self.h_face = kwargs.get("h_face", None)
        self.av_ground_slope = kwargs.get("av_ground_slope", None)
        self._extra_class_inputs = [
            "locs", "start", "end", "ug_values", "ug_xs", "h_face",
            "av_ground_slope", "datum"
        ]
        self.inputs = self.inputs + self._extra_class_inputs

    def add_cpt_by_coords(self, cpt, coords, **kwargs):

        esp = kwargs.get("esp", None)
        loc = Loc(cpt=cpt, name=cpt.file_name, esp=esp)
        loc.coords = coords
        return self.add_loc_by_coords(coords, loc)

    def add_cpt(self, cpt, x, **kwargs):
        offset = kwargs.get("offset", None)
        off_dir = kwargs.get("off_dir", "-")
        esp = kwargs.get("esp", None)
        loc = Loc(cpt=cpt,
                  name=cpt.file_name,
                  offset=offset,
                  off_dir=off_dir,
                  esp=esp)
        return self.add_loc(x, loc)

    def get_cpt_names(self):
        _cpts = []
        for x in self.locs:
            _cpts.append(self.locs[x].cpt_file_name)
        return _cpts

    def set_ids(self):
        for i, loc_name in enumerate(self.locs):
            self.locs[loc_name].id = i + 1
            if self.locs[loc_name].soil_profile is not None:
                self.locs[loc_name].soil_profile.id = i + 1

    def to_dict(self, extra=(), **kwargs):
        outputs = OrderedDict()
        skip_list = ["locs"]
        if hasattr(self, "inputs"):
            full_inputs = list(self.inputs) + list(extra)
        else:
            full_inputs = list(extra)
        for item in full_inputs:
            if item not in skip_list:
                value = self.__getattribute__(item)
                outputs[item] = sf.collect_serial_value(value)
        return outputs

    def add_to_dict(self, models_dict, **kwargs):
        if self.base_type not in models_dict:
            models_dict[self.base_type] = OrderedDict()
        outputs = self.to_dict(**kwargs)
        models_dict[self.base_type][self.unique_hash] = outputs
        for loc_num in self.locs:
            self.locs[loc_num].add_to_dict(
                models_dict,
                parent_dict=models_dict[self.base_type][self.unique_hash])

    def reset_cpt_folder_paths(self, folder_path):
        for loc_name in self.locs:
            self.locs[loc_name].cpt_folder_path = folder_path

    @property
    def tran_line(self):
        try:
            from liquepy.spatial.map_coords import Line
            return Line(self.s_coords, self.e_coords)
        except ImportError as e:
            warnings.warn('Need to import spatial packages', stacklevel=3)
            warnings.warn(e, stacklevel=3)
            return None

    @property
    def x_end(self):
        return self.tran_line.dist

    @property
    def locs(self):
        return self._locs

    def add_loc(self, x: float, loc):
        loc.x = x
        self._locs[x] = loc
        self._sort_locs()
        return self._locs[x]

    def add_loc_by_coords(self, coords, loc):
        from liquepy.spatial import map_coords
        if not sum(self.start) or not sum(self.end):
            raise ValueError("start and end coordinates must be set")
        loc.x = map_coords.calc_proj_line_dist(self.tran_line, coords)
        loc.offset = map_coords.calc_line_offset(self.tran_line, coords)
        loc.off_dir = map_coords.calc_line_off_dir(self.tran_line, coords)
        self._locs[loc.x] = loc
        self._sort_locs()
        return self._locs[loc.x]

    @locs.setter
    def locs(self, locs):
        for loc_id in locs:
            loc_dist = locs[loc_id]["x"]
            self.locs[loc_dist] = Loc()
            sm.add_to_obj(self.locs[loc_dist], locs[loc_id])

    def _sort_locs(self):
        """
        Sort the locs by distance.
        :return:
        """
        self._locs = OrderedDict(sorted(self._locs.items(),
                                        key=lambda t: t[0]))

    def get_loc_by_name(self, name):
        for x in self.locs:
            if self.locs[x].name == name:
                return self.locs[x]

    def get_loc_by_dist(self, dist):
        return self.locs[dist]

    def loc(self, index):
        index = int(index)
        if index == 0:
            raise KeyError("index=%i, but must be 1 or greater." % index)
        return list(self._locs.values())[index - 1]

    def remove_loc(self, loc_int):
        key = list(self._locs.keys())[loc_int - 1]
        del self._locs[key]

    def replace_loc(self, loc_int, soil):
        key = list(self._locs.keys())[loc_int - 1]
        self._locs[key] = soil

    @property
    def start(self):
        return self.s_coords.as_tuple

    @property
    def end(self):
        return self.e_coords.as_tuple

    @start.setter
    def start(self, values):
        self.s_coords = Coords(lat=values[0], lon=values[1])

    @end.setter
    def end(self, values):
        self.e_coords = Coords(lat=values[0], lon=values[1])
예제 #30
0
from _collections import OrderedDict

# print("Before:\n")
# od = OrderedDict()
# od['a'] = 1
# od['b'] = 2
# od['c'] = 3
# od['d'] = 4
# for key, value in od.items():
#     print(key, value)
#
# print("\nAfter:\n")
# od['c'] = 5
# for key, value in od.items():
#     print(key, value)

input_string = 'aabbccddef'
ordered_dict = OrderedDict()

for input in input_string:
    keys = ordered_dict.keys()
    if input not in keys:
        ordered_dict[input] = 1
    else:
        ordered_dict[input] += 1
out_string = ""
for key, value in ordered_dict.items():
    out_string = out_string + str(key) + str(value)

print(out_string)
from _collections import OrderedDict

favorite_languages = OrderedDict()

favorite_languages['jen'] = 'python'
favorite_languages['sarah'] = 'c'
favorite_languages['edward'] = 'ruby'
favorite_languages['phil'] = 'python'

for nome, linguagen in favorite_languages.items():
    print("\t A linguagem favorita de {} é {}.".format(nome.title(),
                                                       linguagen.title()))
            cars[car][1] -= fuel
            print(
                f"{car} driven for {distance} kilometers. {fuel} liters of fuel consumed."
            )
        if cars[car][0] >= 100000:
            print(f"Time to sell the {car}!")
            del cars[car]
    elif todo == "Refuel":
        car = command[1]
        fuel = int(command[2])
        if cars[car][1] + fuel > 75:
            diff = (cars[car][1] + fuel) - 75
            newdiff = fuel - diff
            cars[car][1] += newdiff
            print(f"{car} refueled with {newdiff} liters")
        else:
            cars[car][1] += fuel
            print(f"{car} refueled with {fuel} liters")
    else:
        car = command[1]
        km = int(command[2])
        if cars[car][0] - km <= 10000:
            cars[car][0] = 10000
        else:
            cars[car][0] -= km
            print(f"{car} mileage decreased by {km} kilometers")
    command = input()
sorted_cars = OrderedDict(
    sorted(cars.items(), key=lambda x: x[1], reverse=True))
for k, v in sorted_cars.items():
    print(f"{k} -> Mileage: {v[0]} kms, Fuel in the tank: {v[1]} lt.")