Example #1
0
def process_files(volume_number, production_prefix, start_bates_number,
                  num_digits, confidentiality, files_to_convert, dirs, files):

    # get dirs and files
    prod_home, prod_data, prod_img, prod_nat, prod_txt, prod_img001, prod_nat001, prod_txt001, completed_dir = dirs
    opt_file, dat_file = files

    # integerize digit number
    num_digits = int(num_digits)

    # initialize bates number
    current_bates_number = int(start_bates_number)

    # print progress bar
    l = len(files_to_convert)
    progress_bar.printProgressBar(0,
                                  l,
                                  prefix='Progress:',
                                  suffix='Complete',
                                  length=50)

    # iterate over directory of input files
    for i, file in enumerate(files_to_convert):

        current_bates_number = process_file(file, volume_number,
                                            production_prefix,
                                            start_bates_number, num_digits,
                                            confidentiality,
                                            current_bates_number, dirs, files)

        progress_bar.printProgressBar(i + 1,
                                      l,
                                      prefix='Progress:',
                                      suffix='Complete',
                                      length=50)
Example #2
0
def get_counts_in_gene_bodies(regions_filename, gene_bodies_output_filename):
    with open(regions_filename, 'r') as file:
        with open(gene_bodies_output_filename, 'w') as output_file:
            tsv_writer = csv.writer(output_file,
                                    delimiter='\t',
                                    lineterminator='\n')

            num_of_lines = sum(1 for _ in file)
            file.seek(0)
            print("Getting gene body counts.")
            printProgressBar(0,
                             num_of_lines,
                             prefix='Progress:',
                             suffix='Complete',
                             length=50)

            for curr_line_num, line in enumerate(file):
                chromosome, left, right, gene_name, _, strand = line.split()

                curr_total = 0
                # Loop through each base in the region
                for i in range(int(left), int(right) + 1):
                    # Get the height and add it to the curr_total
                    if chromosome + '_' + str(
                            i) + '_' + strand in three_prime_counts_dict:
                        curr_total += three_prime_counts_dict[chromosome +
                                                              '_' + str(i) +
                                                              '_' + strand]

                tsv_writer.writerow([gene_name, curr_total])
                printProgressBar(curr_line_num + 1,
                                 num_of_lines,
                                 prefix='Progress:',
                                 suffix='Complete',
                                 length=50)
Example #3
0
def blacklist_regions(blacklisting_file):
    with open(blacklisting_file, 'r') as file:
        num_of_lines = sum(1 for _ in file)
        file.seek(0)
        print("Blacklisting.")
        printProgressBar(0,
                         num_of_lines,
                         prefix='Progress:',
                         suffix='Complete',
                         length=50)
        for curr_line_num, line in enumerate(file):
            chromosome, left, right, name, _, strand = line.split()

            # Loop through each base in the blacklist region and change the number of counts in the 3' dictionary to zero
            for i in range(int(left), int(right)):
                if chromosome + '_' + str(
                        i) + '_' + strand in three_prime_counts_dict:
                    # If that base has a 3' end mapped already, add one to the value
                    three_prime_counts_dict[chromosome + '_' + str(i) + '_' +
                                            strand] = 0

            if curr_line_num % 10_000 == 0 or curr_line_num == num_of_lines - 1:
                printProgressBar(curr_line_num + 1,
                                 num_of_lines,
                                 prefix='Progress:',
                                 suffix='Complete',
                                 length=50)
Example #4
0
def make_5_prime_counts_dict(sequencing_filename):
    with open(sequencing_filename, 'r') as file:
        num_of_lines = sum(1 for _ in file)
        file.seek(0)
        print("Building the 5' counts dict.")
        printProgressBar(0,
                         num_of_lines,
                         prefix='Progress:',
                         suffix='Complete',
                         length=50)

        for i, line in enumerate(file):
            chromosome, left, right, _, _, strand = line.split()

            if strand == "+":
                five_prime_position = left
            else:
                five_prime_position = right

            if chromosome + '_' + five_prime_position + '_' + strand in five_prime_counts_dict:
                # If that base has a 5' end mapped already, add one to the value
                five_prime_counts_dict[chromosome + '_' + five_prime_position +
                                       '_' + strand] += 1
            else:
                # If that base does not have a mapped 5' read yet, set the value to 1
                five_prime_counts_dict[chromosome + '_' + five_prime_position +
                                       '_' + strand] = 1

            if i % 100_000 == 0 or i == num_of_lines - 1:
                printProgressBar(i + 1,
                                 num_of_lines,
                                 prefix='Progress:',
                                 suffix='Complete',
                                 length=50)
def video_processing(filename, output_video=None):
    cap = cv2.VideoCapture(filename) #input video file object
    
    if(output_video):
        width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # original file width
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # original file height
        fps = int(cap.get(cv2.CAP_PROP_FPS)) # original file FPS
        fourcc = cv2.VideoWriter_fourcc(*'DIVX') # video codec for processed video with points
        out = cv2.VideoWriter(output_video,fourcc,fps,(width,height)) # outpur video object
        
    
    coordinate_from_all_frames={} #dictionary where for each frame we will save coordinate of bacteria
    
    l = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # total number of frames in video
    print("Frames in video: %d"%(l))
    printProgressBar(0,l,prefix="Progress:",suffix="Complete",length=50) # begin of beautiful progress bar :3
    for frame_number in range(0,l):
        retval,frame = cap.read() # reading one frame
        gray_frame = frame[:,:,0] # converting RGB -> Grayspace
        segmentation_map = recognize(gray_frame) # inference prediction for frame
        coordinates_from_one_frame=get_centre_of_shapes(segmentation_map) # coordinates of bacteria for frame
        if(output_video): # if you want to save video with marked bacterias 
            for point in coordinates_from_one_frame:                
                cv2.circle(frame, (point[0],point[1]), 3, (255, 0, 0), -1) # draw blue point on frame
            frame[:,:,1]=frame[:,:,1]+segmentation_map*200 # green 
            out.write(frame)
        coordinate_from_all_frames[frame_number]=coordinates_from_one_frame
        printProgressBar(frame_number+1,l,prefix="Progress:",suffix="Complete",length=50) # update of beautiful progress bar :3
    cap.release()
    if(output_video):
        out.release()
    return coordinate_from_all_frames
def main(books, path_download):
    """Recebendo todas os livros em um lista de dicionario contendo as informações do livro"""

    path_url = '/content/pdf/'

    url = urljoin(BASE_URL, path_url)
    l = len(books)

    print("\nExecutando Downloads dos Livros")
    printProgressBar(0, l, prefix="Progress", suffix="Complete", length=50)

    for i, book in enumerate(books):
        title = book['title']
        path_book = book['url_book']
        path_book = path_book[6:]
        data = quote_plus(path_book) + '.pdf'
        url_full = urljoin(url, data)

        file_name = title + '.pdf'
        file_name = file_name.replace('/', '-')
        if os.path.exists(os.path.join(path_download, file_name)):
            continue

        urlretrieve(url_full, os.path.join(path_download, file_name))

        printProgressBar(i + 1,
                         l,
                         prefix='Progress:',
                         suffix='Complete',
                         length=50)
Example #7
0
def map_tsrs(tsr_filename):
    with open(tsr_filename, 'r') as file:

        num_of_lines = sum(1 for _ in file)
        file.seek(0)
        print("Mapping TSRs.")
        printProgressBar(0,
                         num_of_lines,
                         prefix='Progress:',
                         suffix='Complete',
                         length=50)

        # Loop through the TSR file
        for i, line in enumerate(file):
            tsr_chromosome, tsr_left, tsr_right, _, tsr_counts, tsr_strand = line.split(
            )
            # Set the correct chromosome to true. It is false when the chromosome in the TSR file is not present in the annotation file
            correct_chromosome = True

            try:
                regions_dict[tsr_chromosome]
            except KeyError:
                # This means the TSR file has a chromosome that is not present in the annotation file
                correct_chromosome = False

            if correct_chromosome:
                for region in regions_dict[tsr_chromosome]:
                    chromosome, left, right, strand, gene_name = region

                    # If there is a TSR as that base pair, add it to the gene_tsr dict
                    if tsr_strand == strand and not (
                            int(tsr_right) < int(left)
                            or int(tsr_left) > int(right)):
                        if gene_name not in gene_tsr_dict:
                            gene_tsr_dict[gene_name] = [line.split()]
                        else:
                            gene_tsr_dict[gene_name].append(line.split())

                    # If the left of the region is past the right side of the TSR, we don't need to search the rest of the file
                    if int(left) > int(tsr_right):
                        break

            if i % 300 == 0 or i == num_of_lines - 1:
                printProgressBar(i + 1,
                                 num_of_lines,
                                 prefix='Progress:',
                                 suffix='Complete',
                                 length=50)
Example #8
0
    def predict(self, X, conv_s=1, pool_f=2, pool_s=2, read_from_file=False):
        ret = []
        for i, image in enumerate(X):
            printProgressBar(self,
                             iteration=i + 1,
                             total=X.shape[0],
                             prefix='Progress:',
                             suffix='   |   Example #: ')
            if read_from_file:
                #print(pickle.load(open(self.save_path, 'rb')))
                self.params = pickle.load(open(self.save_path, 'rb'))

            [f1, f2, w3, w4, b1, b2, b3, b4] = self.params

            #image-= np.mean(image)
            #image/= np.std(image)

            image = image.reshape(1, 28, 28)

            ################################################
            ############## Forward Operation ###############
            ################################################
            conv1 = convolution(image, f1, b1, conv_s)  # convolution operation
            conv1[conv1 <= 0] = 0  # pass through ReLU non-linearity

            conv2 = convolution(conv1, f2, b2,
                                conv_s)  # second convolution operation
            conv2[conv2 <= 0] = 0  # pass through ReLU non-linearity

            pooled = maxpool(conv2, pool_f, pool_s)  # maxpooling operation

            (nf2, dim2, _) = pooled.shape
            fc = pooled.reshape((nf2 * dim2 * dim2, 1))  # flatten pooled layer

            z = w3.dot(fc) + b3  # first dense layer
            z[z <= 0] = 0  # pass through ReLU non-linearity

            out = w4.dot(z) + b4  # second dense layer

            probs = softmax(
                out)  # predict class probabilities with the softmax activation
            #print(probs)
            prediction = np.argmax(probs)
            ret.append(prediction)

        return ret
Example #9
0
    def fit(self, X, y, maxEpochs=500):
        n, d = X.shape

        if y.ndim == 1:
            y = y[:, None]

        self.layer_sizes = [X.shape[1]
                            ] + self.hidden_layer_sizes + [y.shape[1]]
        self.classification = y.shape[
            1] > 1  # assume it's classification iff y has more than 1 column

        # random init
        scale = 0.01
        weights = list()
        for i in range(len(self.layer_sizes) - 1):
            W = scale * np.random.randn(self.layer_sizes[i + 1],
                                        self.layer_sizes[i])
            b = scale * np.random.randn(1, self.layer_sizes[i + 1])
            weights.append((W, b))
        weights_flat = flatten_weights(weights)

        for e in range(maxEpochs - 1):
            printProgressBar(self,
                             iteration=e + 1,
                             total=maxEpochs,
                             prefix='Progress:',
                             suffix='   |   Epoch #: ')
            random_indices = np.random.randint(n, size=2000)
            #Take a random sample of corresponding X and y
            X_rand = X[random_indices]
            y_rand = y[random_indices]

            weights_flat_new, f = findMin(self.funObj,
                                          weights_flat,
                                          self.max_iter,
                                          X_rand,
                                          y_rand,
                                          verbose=True,
                                          alpha=0.0001)

            weights_flat = weights_flat_new

        self.weights = unflatten_weights(weights_flat_new, self.layer_sizes)
Example #10
0
def train(data, cnp, epochs, num_test_maximum, alpha, optimizer):
    cnp.train()
    for epoch in range(epochs):
        total_loss = 0
        iteration = 0
        length = len(data)
        printProgressBar(0,
                         length,
                         prefix='Epoch {} Progress:'.format(epoch + 1),
                         suffix='Complete',
                         length=100)
        for i, function in enumerate(data):
            optimizer.zero_grad()
            num_points = function[0].size()[0]
            perm = torch.randperm(num_points)
            num_context = np.random.randint(num_points - num_test_maximum,
                                            num_points)
            context_x = function[0][perm][0:num_context]
            context_y = function[1][perm][0:num_context]
            test_x = function[0][perm][num_context:num_points]
            test_y = function[1][perm][num_context:num_points]
            mu, sigma, log_p, en_dist, t_en_dist, MSE = cnp(
                context_x, context_y, test_x, test_y)
            loss = lossf(
                log_p,
                torch.distributions.Normal(torch.zeros(cnp.encoded_size), 1),
                en_dist, t_en_dist, alpha, MSE)

            loss.backward()
            optimizer.step()

            total_loss += loss / len(data)

            iteration += 1

            printProgressBar(
                i + 1,
                length,
                prefix='Epoch {} Progress:'.format(epoch + 1),
                suffix='Complete.  Iteration = {}. Average loss = {}.'.format(
                    iteration, total_loss),
                length=50)
        print('EPOCH LOSS {}'.format(total_loss))
Example #11
0
    def predict(self, Xtest):
        #empty vector of appropriate size to store predictions
        y_pred = np.empty(len(Xtest))

        #iterate over Xtest, generating prediction for each
        for i in range(len(Xtest)):
            printProgressBar(self,
                             iteration=i + 1,
                             total=len(Xtest),
                             prefix='Progress:',
                             suffix='   |   Example #: ')

            #array to store tuples of training examples' distance and class
            sim_class = []

            #iterate over each training example
            for j in range(len(self.X)):
                #calculate distance to Xtest example in question
                #dist = np.linalg.norm(Xtest[i] - self.X[j])
                #add a new distance, label entry to dist_class
                sim = cosineSim(Xtest[i], self.X[j])
                sim_class.append([sim, self.y[j]])
            #print(sim_class)

            #key method to sort sim_class by similarity
            def take_first(arr):
                return arr[0]

            #sort sim_class by similarity
            sim_class.sort(key=take_first)
            #take the k closest examples
            k_nearest = sim_class[-self.k:]
            print(k_nearest)
            #extract the labels
            k_nearest_labels = [x[1] for x in k_nearest]
            #find the mode of those labels
            mode_label = stats.mode(k_nearest_labels)
            #enter the calculated mode as the predicted y value
            y_pred[i] = mode_label[0]
        return y_pred
Example #12
0
    def fit(self, X, Y, maxEpochs=15000):
        # Add a bias to the X data matrix
        Z = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
        n, d = Z.shape

        #set learning rate:
        lr = .001
        #Control the degree of regularization:
        C = 0.4

        self.W = np.zeros((10, d))

        for e in range(maxEpochs):
            printProgressBar(self,
                             iteration=e + 1,
                             total=maxEpochs,
                             prefix='Progress:',
                             suffix='   |   Epoch #: ')
            #Choose a sandom set of 50 indices
            random_indices = np.random.randint(n, size=50)

            #Take a random sample of corresponding X and y
            X_rand = Z[random_indices]
            Y_rand = Y[random_indices]

            # Loop through all tuples, (x,y), from random selection
            for (x, y) in zip(X_rand, Y_rand):
                #loop through y (10 values, one for each digit)
                for i in range(len(y)):
                    #set yi to -1 if y[i] is 0, 1 otherwise (ie. if the current digit --i-- is not represented by x)
                    if y[i] == 0:
                        yi = -1
                    else:
                        yi = 1
                    #Apply the appropriate gradient if there is error (direction determined by yi)
                    if yi * np.dot(x, self.W[i].T) <= 1:
                        self.W[i] = np.add(self.W[i], lr * C * yi * x)

        return self.W
Example #13
0
def iphostname(ip_list):  # pull hostnames for list of IPs

    r = 1
    for i, ip in enumerate(ip_list):
        try:
            nmScan = nmap.PortScanner()
            results = nmScan.scan(str(ip), arguments='-A -T5')
            hostname = results['scan'][str(ip)]['hostnames'][0]['name']
            OS = results['scan'][str(ip)]['osmatch'][0]['name']
            if OS == None:
                ips_hostnames.add_row([ip, hostname, ''])
            else:
                ips_hostnames.add_row([ip, hostname, OS])
            a('clear')
        except Exception, e:
            print e
            continue
        progress_bar.printProgressBar(
            i + 1,
            len(ip_list),
            prefix='Scanning %d of %d...\nProgress:' % (r + 1, len(ip_list)),
            suffix='Complete',
            length=50)
Example #14
0
    def train_on_memory(self, batch_size, epochs):
        """
        Trains the agent on experiences from its experience replay memory.
        :param batch_size: Batch size for training
        :param epochs: Number of times the mem should be fully browsed
        """
        print("Training on ", epochs, " epochs from the replay memory..")

        # Get all data from the replay memory
        states, actions, next_states, rewards = self.mem.all()

        # Shuffling the batches
        lines_shuffle = torch.randperm(states.size()[0])
        states = states[lines_shuffle]
        actions = actions[lines_shuffle]
        rewards = rewards[lines_shuffle]
        next_states = next_states[lines_shuffle]

        # Split them into batches
        states_batches = torch.split(states, batch_size)
        actions_batches = torch.split(actions, batch_size)
        next_states_batches = torch.split(next_states, batch_size)
        rewards_batches = torch.split(rewards, batch_size)

        # Number of batches
        nb_batches = len(states_batches)

        # Train
        for ep in range(epochs):
            batches_completed = 0
            for states, actions, next_states, rewards \
                    in zip(states_batches, actions_batches, next_states_batches, rewards_batches):
                self.train_on_batch(states, actions, next_states, rewards)
                batches_completed += 1
                printProgressBar(batches_completed, nb_batches,
                                 "Epoch " + str(ep + 1) + "/" + str(epochs), length=90)
def synthesize_images(set_name, op_list):
    """Synthesize data from original images"""

    op_todo = [([op_list[0]]), ([op_list[1]]), ([op_list[2]]),
               ([op_list[0], op_list[2]]), ([op_list[1], op_list[2]])]
    print op_todo
    # for ind in range(len(op_list)):
    #     for item in itertools.combinations(op_list, ind+1):
    #         op_todo.append(item)

    # img_path = "data_sets/%s/data/" % (set_name)
    img_path = [
        "data_sets/%s/left/" % (set_name),
        "data_sets/%s/right/" % (set_name)
    ]
    csv_file = "model_data/%s_log.csv" % (set_name)

    with open(csv_file, 'r') as in_csv:
        for line in in_csv:
            if re.search(r"(flip|autocont|equalize|darken|brighten)", line):
                printProgressBar(1, 1)
                return

    print "Processing images..."
    with open(csv_file, 'a+') as io_csv:
        io_csv.seek(0)
        reader = csv.reader(io_csv, delimiter=',')
        attribute = next(reader, None)
        entries = list(reader)
        cnt_total = len(entries)
        cnt_iter = 0
        printProgressBar(cnt_iter, cnt_total)
        for entry in entries:
            cnt_iter += 1
            printProgressBar(cnt_iter, cnt_total)
            # try:
            new_entries = process_image(img_path, [entry[0], entry[1]],
                                        int(entry[-1]), op_todo)
            writer = csv.writer(io_csv, delimiter=',')
            for new_entry in new_entries:
                writer.writerow(new_entry)
            # except:
            # print "CSV entry error"
            time.sleep(0.1)
Example #16
0
    all_df = pd.merge(df, picks_df)

    season_totals = all_df.groupby(['season', 'player'])['points'].sum()
    return season_totals.groupby('season').std().mean()


first = list(range(1, CONFIG.PLAYERS + 1))  # first round goes in order
rest = (x + 1 for y in range(CONFIG.PICKS - 1) for x in range(CONFIG.PLAYERS))

perms = itertools.permutations(rest)

best_combo = None
best_result = 10000

total_runs = math.factorial(CONFIG.PLAYERS * (CONFIG.PICKS - 1))
print("There will be {0} total runs".format(total_runs))

run_count = 1

for rest in perms:
    picks = first + list(rest)
    result = calculate_std(picks)
    printProgressBar(run_count, total_runs)
    run_count += 1
    if result < best_result:
        best_combo = picks
        best_result = result

print("Best result was: {0}".format(best_result))
print("Best combo was: {0}".format(best_combo))
Example #17
0
import time
from get_dir_files import run_fast_scandir
from essentia_bpm import get_file_bpm
from progress_bar import printProgressBar

directory = r"/Volumes/Carl'S/Music"
subf, files = run_fast_scandir(directory, [".flac", ".mp3"])

files_length = len(files)
files_count = 0
print("Processing %d files" % files_length)

time1 = time.time()
printProgressBar(0,
                 files_length,
                 prefix='Progress:',
                 suffix='Complete',
                 length=50)
for file in files:
    files_count = files_count + 1
    try:
        song = mutagen.File(file, easy=True)
        if (not song.get("bpm", False)):
            song["bpm"] = str(int(get_file_bpm(file)))
            song.save()
        print("Processed:" + file, file=open("output.txt", "a"))
    except:
        print("Failed:" + file, file=open("output.txt", "a"))
    printProgressBar(files_count,
                     files_length,
                     prefix='Progress:',
Example #18
0
def build_dict(filename):
    """
    pass this method training data and it will create dictionaries, one for the ammount of each negative token,
    one for the amount of each positve token and on probability dictionary which states the probability of each token beeing positive( positive = over 0.5)

    Posiive means that the ticket/token has been solved on the first level of support in this cenario 

    Parameters:
    filename    - Requiered : name of the csv-file with the training data (string)
    """
    dictionary = {}
    regex_noword = re.compile(r"\W",
                              re.IGNORECASE)  #everything that is not a word
    regex_digit = re.compile(r"\d", re.IGNORECASE)  #all numbers

    #start processing training data
    csv_file = open(filename, 'r', encoding='utf-8', errors='ignore')
    ticketreader = csv.reader(csv_file, delimiter=',', quotechar='|')
    tickets = list(ticketreader)

    #progress
    l = len(tickets)
    i = 0
    progress_bar.printProgressBar(0,
                                  l,
                                  prefix='Progress:',
                                  suffix='Complete',
                                  length=50)
    for i, ticket in enumerate(tickets):
        progress_bar.printProgressBar(iteration=i + 1,
                                      total=l,
                                      prefix='Progress:',
                                      suffix='Complete',
                                      length=50)

        more_tokens = []

        # deleting all the unwanted characters
        line = str(ticket)
        line = line.replace(
            '\\ufeff', ' '
        )  #this is a so called byte order mark (BOM, not relevant anymore)
        line = line.replace(';', ' ')
        line = line.replace(',', ' ')
        line = line.replace('_', ' ')
        line = line.replace('\\n', ' ')
        line = line.replace('\\xa0', ' ')
        line = line.replace('\\t', ' ')
        line = line.replace('\\r', ' ')
        line = line.replace('\\', ' ')
        line = re.sub(regex_noword, ' ', line)
        line = re.sub(regex_digit, ' ', line)

        tokens = line.split()
        tokens = list(filter(None, tokens))
        # check for chinese/japanese tokens
        tokens = cj.check_for_cj_chars(tokens)

        for token in tokens:
            token = token.lower()
            if token in dictionary:
                dictionary[token] = dictionary[token] + 1
            else:
                dictionary[token] = 1

    csv_file.close()
    return dictionary
Example #19
0
def merge_files(path_to_folder_with_files, merged_file_full_name):

    merged_file = open(merged_file_full_name,
                       'w',
                       encoding='utf-8',
                       newline="")
    writer = csv.writer(merged_file, delimiter='\t')
    writer.writerow([
        "ANO_BO", "NUM_BO", "BO_INICIADO", "BO_EMITIDO", "DATAOCORRENCIA",
        "PERIDOOCORRENCIA", "FLAGRANTE", "LATITUDE", "LONGITUDE",
        "DESCRICAOLOCAL", "DELEGACIA_NOME", "DELEGACIA_CIRCUNSCRICAO",
        "RUBRICA", "VITIMAFATAL", "SEXO", "IDADE", "CORCUTIS",
        "NATUREZAVINCULADA"
    ])

    files_to_be_merged = [
        join(path_to_folder_with_files, f)
        for f in listdir(path_to_folder_with_files)
        if isfile(join(path_to_folder_with_files, f))
        and f.split('.')[-1] == 'csv'
    ]
    number_of_files_to_be_merged_str = str(len(files_to_be_merged))

    print("Begining to merge " + number_of_files_to_be_merged_str + " files")

    total_row_count = 0

    current_file_index = 1
    for file_name in files_to_be_merged:
        print("File " + str(current_file_index) + '/' +
              number_of_files_to_be_merged_str + ':')
        print(file_name)

        print("Counting rows...")
        source_file = open(file_name, 'r', encoding='ansi')

        reader = csv.reader(source_file, delimiter='\t')
        next(reader)

        row_count = sum(1 for row in reader)
        print(str(row_count) + " rows")
        total_row_count += row_count

        source_file.close()

        current_file_index += 1

    current_file_index = 1
    current_row_index = 0
    for file_name in files_to_be_merged:
        print("File " + str(current_file_index) + '/' +
              number_of_files_to_be_merged_str + ':')
        print(file_name)

        source_file = open(file_name, 'r', encoding='utf-8')

        reader = csv.reader(source_file, delimiter='\t')
        next(reader)

        for row in reader:
            writer.writerow(row)
            current_row_index += 1
            if (current_row_index % 100 == 0
                    or current_row_index == total_row_count):
                printProgressBar(current_row_index,
                                 total_row_count,
                                 suffix='Completed',
                                 decimals=2)

        print()
        source_file.close()
        current_file_index += 1

    merged_file.close()
Example #20
0
def trainABit(execution_plan, previous_losses = []):
  with tf.Session() as sess:
    # Create model.
    print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size))
    model = create_model(sess, False)
    dev_set, train_set = execution_plan.getData(model.global_step.eval())
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    print("Train bucket sizes", train_bucket_sizes)
    train_total_size = float(sum(train_bucket_sizes))

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # This is the training loop.
    step_time, loss = 0.0, 0.0
    current_step = 0
    while current_step <= 10 * FLAGS.steps_per_checkpoint:
      progress_bar.printProgressBar(current_step, 10 * FLAGS.steps_per_checkpoint)
      # Choose a bucket according to data distribution. We pick a random number
      # in [0, 1] and use the corresponding interval in train_buckets_scale.
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      # Get a batch and make a step.
      start_time = time.time()
      
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
         train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
      step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
      loss += step_loss / FLAGS.steps_per_checkpoint
      current_step += 1


      # Once in a while, we save checkpoint, print statistics, and run evals.
      if current_step % FLAGS.steps_per_checkpoint == 0:
        # Print statistics for the previous epoch.
        perplexity = math.exp(float(loss)) if loss < 300 else float("inf")
        print("global step %d learning rate %.4f step-time %.2f perplexity "
               "%.2f loss %.2f " % (model.global_step.eval(), model.learning_rate.eval(),
                         step_time, perplexity, loss))
        # Decrease learning rate if no improvement was seen over last 3 times.
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)
        # Save checkpoint and zero timer and loss.
        checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt")
        if current_step % (5 * FLAGS.steps_per_checkpoint) == 0:
            model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        # Run evals on development set and print their perplexity.
        for bucket_id in xrange(len(_buckets)):
          if len(dev_set[bucket_id]) == 0:
            print("  eval: empty bucket %d" % (bucket_id))
            continue
          encoder_inputs, decoder_inputs, target_weights = model.get_batch(dev_set, bucket_id)

          _, eval_loss, outputs = model.step(sess, encoder_inputs, decoder_inputs,
                                             target_weights, bucket_id, True)
          eval_ppx = math.exp(float(eval_loss)) if eval_loss < 300 else float(
              "inf")
          print("  eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx))

        sys.stdout.flush()
        essay_score_graduated, interview_score_dont_graduated,
        essay_score_dont_graduated, titles, fig_title, xlabel, ylabel,
        savename)

    titles = [
        'Results of students which graduated',
        "Results of students which didn't graduated", '', ""
    ]
    fig_title = "Exams and scores result in " + str(years[i])
    xlabel = ["GPA", "GPA", "LANGUAGE EXAM", "LANGUAGE EXAM"]
    ylabel = [
        "TOTAL SCORE", "TOTAL SCORE", "SOCIAL ACTIVITY", "SOCIAL ACTIVITY"
    ]
    savename = "graduated_score_graph_" + str(years[i]) + "_part2"
    graduated_scores_graph.draw_graph(gpa_graduated, score_graduated,
                                      gpa_dont_graduated, score_dont_graduated,
                                      language_exam_graduated,
                                      social_activity_graduated,
                                      language_exam_dont_graduated,
                                      social_activity_dont_graduated, titles,
                                      fig_title, xlabel, ylabel, savename)
    printProgressBar(progress_bar_count,
                     len(years),
                     prefix='Getting data:',
                     suffix='Complete',
                     length=50)
    progress_bar_count = progress_bar_count + 1

# database disconnect
database_disconnect.database_disconnect(db_connection, db_connection.cursor())
Example #22
0
        try:
            nmScan = nmap.PortScanner()
            nmScan.scan(str(ip), arguments='-n -sP -PE -T4')
            state = nmScan[str(ip)].state()
            if state == 'up':
                ip_list.append(str(ip))
            else:
                pass
        except Exception, e:
            continue

    print "%d Hosts are up." % len(ip_list)

    progress_bar.printProgressBar(0,
                                  len(ip_list),
                                  prefix='Scanning %d of %d...\nProgress:' %
                                  (1, len(ip_list)),
                                  suffix='Complete',
                                  length=50)  # initialize progress bar
    iphostname(ip_list)


def iphostname(ip_list):  # pull hostnames for list of IPs

    r = 1
    for i, ip in enumerate(ip_list):
        try:
            nmScan = nmap.PortScanner()
            results = nmScan.scan(str(ip), arguments='-A -T5')
            hostname = results['scan'][str(ip)]['hostnames'][0]['name']
            OS = results['scan'][str(ip)]['osmatch'][0]['name']
            if OS == None:
Example #23
0
    if print_progress_bar:
        # Initial call to print 0% progress
        if progress_bar_varable < 6:
            print_data_name = gamepad_axis_dict[progress_bar_varable]
        elif progress_bar_varable > 5:
            print_data_name = gamepad_button_dict[progress_bar_varable - 6]
        elif progress_bar_varable > 17:
            print(
                "ERROR: Attempting to access value outside of preassigned range."
            )
            print("ERROR: Exiting...")
            #return

        printProgressBar(0,
                         max_time,
                         prefix=print_data_name,
                         suffix='',
                         length=50)

    start = time.time()
    while not done:
        pygame.event.get()

        ##for event in pygame.event.get(): # User did something.
        ##  if event.type == pygame.JOYBUTTONDOWN:
        ##    #print("Joystick button pressed.")
        ##  #print("")
        ##elif event.type == pygame.JOYBUTTONUP:
        #print("Joystick button released.")
        #print("")
Example #24
0
def main():
    x = []
    y = []

    #alegere poza

    if len(sys.argv) != 2:
        print(
            "Wrong number of arguments taken from console! (expected 1 argument but recieved:",
            len(sys.argv) - 1, ")")
        sys.exit(1)

    else:
        photo_name = sys.argv[1]

    #identificare extensie poza
    extension = photo_name.split('.')[-1]

    if extension != 'png' and extension != 'jpg':
        print("File must be .png or .jpg. Choose another file:")
        photo_name = input()
        extension = photo_name.split('.')[-1]

    #citire si afisare imagine originala
    OrgImage = Image.open(photo_name)
    OrgImage.show()
    image = cv2.imread(photo_name)
    img = rgb2gray(image)

    print("Blur level ( in range (1, 100) ):")
    nivel_blur = float(input())

    while nivel_blur < 1:
        print("Blur must be over 1 :")
        nivel_blur = float(input())

    #training

    if nivel_blur < 5:
        printProgressBar(0,
                         20,
                         prefix='Progress:',
                         suffix='Complete',
                         length=50)
        for i in range(20):
            x.append(i)
            boxImage = OrgImage.filter(ImageFilter.BoxBlur(i))
            boxImage.save(".\\auxiliar." + extension)
            blur = cv2.imread("auxiliar." + extension)
            bl = rgb2gray(blur)
            y.append(get_contrast(bl))
            printProgressBar(i + 1,
                             20,
                             prefix='Progress:',
                             suffix='Complete',
                             length=50)
    else:
        printProgressBar(0,
                         math.ceil((nivel_blur + 10) / 2),
                         prefix='Progress:',
                         suffix='Complete',
                         length=50)
        for i in range(math.ceil((nivel_blur + 10) / 2)):
            x.append(2 * i)
            boxImage = OrgImage.filter(ImageFilter.BoxBlur(2 * i))
            boxImage.save(".\\auxiliar." + extension)
            blur = cv2.imread("auxiliar." + extension)
            bl = rgb2gray(blur)
            y.append(get_contrast(bl))
            printProgressBar(i + 1,
                             math.ceil((nivel_blur + 10) / 2),
                             prefix='Progress:',
                             suffix='Complete',
                             length=50)

    # determinarea polinomului care aproximeaza functia
    coef = polyfit(x, y)
    y_contrast = []
    x_contrast = []
    for i in range(math.ceil((nivel_blur + 10) / 2)):
        x_contrast.append(2 * i)
        y_contrast.append(evalpoly(coef, 2 * i))

    # aflare minim
    for i in range(len(coef)):
        coef[i] = (-1) * coef[i]

    x_min, y_min, val_min = gold(0, 20, coef, 2)

    if nivel_blur > 20:
        k = math.ceil((nivel_blur / 20) - 1)
        for i in range(1, k):
            x_aux, y_aux, val_aux = gold(20 * i, 20 * (i + 1), coef, 1)
            if x_min < x_aux:
                val_min = [val_aux[0]] + val_min
            else:
                val_min = [val_min[0]] + val_aux
            if y_min > y_aux:
                x_min = x_aux
                y_min = y_aux
                val_min = val_aux

    y_min = (-1) * y_min
    for i in range(len(coef)):
        coef[i] = (-1) * coef[i]

    # afisarea rezultatelor
    plt.figure()
    plt.subplot(121)
    plt.plot(x_contrast, y_contrast)
    plt.plot(nivel_blur, evalpoly(coef, nivel_blur), 'r*')
    plt.grid(True)
    plt.title('Grafic')
    boxImage = OrgImage.filter(ImageFilter.BoxBlur(nivel_blur))
    boxImage.save(".\\auxiliar." + extension)
    blur = cv2.imread("auxiliar." + extension)
    plt.subplot(122)
    plt.imshow(cv2.cvtColor(blur, cv2.COLOR_BGR2RGB))
    plt.title('Imagine')
    plt.xticks([]), plt.yticks([])
    plt.savefig('results.png')
    print('Open result.png and press enter to continue')
    input()

    for i in val_min:
        plt.figure()
        plt.subplot(121)
        plt.plot(x_contrast, y_contrast)
        plt.plot(i, evalpoly(coef, i), 'r*')
        plt.grid(True)
        plt.title('Grafic')
        boxImage = OrgImage.filter(ImageFilter.BoxBlur(i))
        boxImage.save(".\\auxiliar." + extension)
        blur = cv2.imread("auxiliar." + extension)
        plt.subplot(122)
        plt.imshow(cv2.cvtColor(blur, cv2.COLOR_BGR2RGB))
        plt.title('Imagine')
        plt.xticks([]), plt.yticks([])
        plt.savefig('results.png')
        input()
Example #25
0
else:
    print('Parameters: N_dim N_data N_times')
    sys.exit()


def alpha(theta, phi):
    return np.sin(theta) * np.exp(1j * phi) / 2

# %%

zero_state = np.matrix([1, 0], dtype=np.complex128)
times = np.linspace(1e-7, 10, N_times)

# %%
E = np.zeros((N_times, N_data))
theta_d = np.random.uniform(0, np.pi, (N_data, N_dim))
phi_d = np.random.uniform(0, 2 * np.pi, (N_data, N_dim))
# %%
for i, dt in enumerate(times):
    for p in range(N_data):
        alp = alpha(theta_d[p, 0], phi_d[p, 0])
        zero_state = np.matrix([np.conj(alp)/np.abs(alp), 1]) / np.sqrt(2)
        rho_0 = zero_state.H @ zero_state
        res = minimize(wrapper, np.full(2 * N_dim, 1e-3 * np.pi), args=(theta_d[p], phi_d[p], dt, rho_0, N_dim))
        E[i, p] = res.fun
        if p == 0:
            print(np.linalg.norm(zero_state))
        printProgressBar(p, N_data, suffix='Time {0}'.format(dt))

np.save('eigen_work_N_{0}_norm'.format(N_dim), E)
Example #26
0
def test(agent: QNetwork,
         movements=100,
         nb_episodes=1000,
         step=0.01,
         show_plots=True):
    """
    Tests the ability of the QNetwork to learn to reach the position (0.5, 0.5)
    while spawning at random coordinates in [0, 1]^2.
    :param agent: QNetwork to be tested. Needs to have state_dim == 2 and 5 possible actions.
    :param movements: Number of moves the agent is allowed to have
    :param step: Distance travelled at each move
    :param nb_episodes: Number of episodes on which the agent trains
    :param show_plots: if True, the agent will plot the results of the training
    :return: The agent's loss memory
    """
    # A state is defined as its x and y coordinates
    state_dim = 2

    # Calculation device
    device = torch.device("cpu")

    # net = Net1(state_dim, nb_actions)
    # QNetwork(net, state_dim, movements, lr=0.1, device=torch.device("cpu"))

    for ep in range(nb_episodes):
        # Play a single episode

        # Create arrays to store the successive states and taken actions
        states = torch.empty(
            (movements + 1, state_dim),
            device=device)  # + 1 to make space for the last state
        actions = torch.empty(movements, dtype=torch.int32, device=device)

        # Start with a random position
        states[0] = torch.rand(2)

        for move in range(movements):
            # Take action
            actions[move] = agent.decide(states[move].view(1, -1)).item()

            # Get next state
            states[move + 1] = next_state(states[move], actions[move], step,
                                          device)

        # Get rewards
        rewards = get_rewards(states[:-1], actions, step, device)

        # Memorize the episode
        agent.memorize_exploration(states,
                                   actions,
                                   rewards,
                                   last_state_is_final=False)

        # Train after the episode
        agent.update()

        printProgressBar(ep + 1,
                         nb_episodes,
                         "Episodes completed: ",
                         length=90)
        # print("Final position: ", states[-1], " | Initial: ", states[0])

    if show_plots:
        plt.figure("Training summary")
        plt.subplot(111)
        plt.title("Agent Trajectories")
        agent.plot_trajectory(torch.rand((50, 2)),
                              lambda s, a: next_state(s, a, step, device))
        # plt.subplot(212)
        # plt.title("MSE Loss")
        # agent.show_training()
        plt.show()
    return agent.loss_mem

    return 0
def get_books_describe(base_url):
    """Faz a busca de todos os livros e retorna uma lista com as descrições de cada um"""
    headers = {
    'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'
    }

    BASE_URL = urllib.parse.urljoin(base_url, 'search/page/')

    #Quantidade de paginas para ser percorrido 
    qtd_pages = 20
    
    #lista de paginas para ser adicionado na url
    num_pages = [str(num) for num in range(1,qtd_pages+1)]
    
    values = {
        'facet-language' : 'En',
        'facet-content-type' : 'Book',
        'package' : 'mat-covid19_textbooks',
        'sortOrder' : 'newestFirst',
        'showAll' : 'false'
    }
    
    data = urllib.parse.urlencode(values)
    #print(data)
    url_list = []
    
    for i in range(0, qtd_pages):
        url_with_num = BASE_URL+num_pages[i]+'?'
        url = url_with_num+ data
        #print(url)
        url_list.append(url)
    
    books_list = []
    l = len(url_list)

    print("Fazendo Busca de todos os livros")
    #Barra de progresso do prompt
    printProgressBar(0, l, prefix="Progress", suffix="Complete", length=50)

    for i, url in enumerate(url_list):
        req = Request(url=url, headers=headers)

        try:
            uclient = uReq(req)
            page_html = uclient.read()
            uclient.close()

            page_soup = soup(page_html, "html.parser")
            #print(page_soup)

            #Busca a lista de livros da pagina
            content_list = page_soup.find("ol", class_="content-item-list")
            books = content_list.findAll('li')

            for book in books:
                divs = book.findAll("div")
                title = divs[1].find("h2").text.strip()
                url_book = divs[1].find("a").get('href')
                p = divs[1].findAll("p")
                subtitle = p[0].text.strip()
                span = p[1].findAll("span")
                autor = span[0].text.strip()
                year = span[1].text.strip()

                #print(title)
                book_data = {
                    'title': title,
                    'url_book' : url_book,
                    'subtitle' : subtitle,
                    'autor' : autor,
                    'year' : year
                }

                books_list.append(book_data)
        
        except HTTPError as e:
            print(e.reason, " ", e.code)

        printProgressBar(i + 1, l, prefix='Progress:', suffix='Complete', length=50)
    
    return books_list
		if '&' in meta:  # for extra commands tacked on
			meta = meta.split('&')[0]
		title = meta.replace('+', ' ') + '.csv'
	else:  # improper usage
		print('The entered youtube link is incompatible with the program')
		print('youtube link styles that work:')
		print('\thttps://www.youtube.com/')
		print('\thttps://www.youtube.com/results?search_query=valuetainment')
		print('\thttps://www.youtube.com/watch?v=x9dgZQsjR6s')
		print('\thttps://www.youtube.com/user/patrickbetdavid')
		print('\thttps://www.youtube.com/playlist?list=PLFa0bDwXvBlDGFtce9u__1sBj6fgi21BE')
		driver.close()
		sys.exit(-1)
	# scroll to the bottom of the youtube page
	for i in range(scroll_amnt):
		printProgressBar(i + 1, scroll_amnt, 'Navigating Youtube:', length=50)
		driver.find_element_by_tag_name('body').send_keys(Keys.END)
		sleep(3)

	html = driver.page_source
	soup = BeautifulSoup(html, 'html.parser')
	master_list = []
	if 'list=' not in yt_link and not single_video:  # channel, homepage, and search
		videos = soup.find_all('div', {'id': 'dismissable'})
		for video in videos:
			if 'ytd-shelf-renderer' in video['class'] or 'ytd-compact-promoted-item-renderer' in video['class'] or 'ytd-rich-shelf-renderer' in video['class']:
				continue  # remove the outer most nested 'video'
			data_dict = {}
			# get title
			data_dict['title'] = video.find('a', {'id': title_id}).text.replace('\n', '')
			# get video url + use id for youtube API
def process_file(reader,
                 writer,
                 writer_for_rows_without_location_with_address,
                 current_row_index=0,
                 total_row_count=1000):
    count_rows_without_coordinates_location_found = 0
    count_rows_without_coordinates_location_not_found = 0
    count_rows_without_coordinates_error_when_geocoding = 0
    count_rows_without_coordinates_and_address = 0
    count_rows_without_period = 0
    count_rows_without_year = 0
    count_rows_without_bo_number = 0
    count_rows_without_police_station_name = 0

    #current_row_index = 0
    for row in reader:
        current_row_index += 1

        printProgressBar(current_row_index,
                         total_row_count,
                         suffix="Complete",
                         decimals=3)

        row_is_incomplete = False

        # row[6] -> PERIODOOCORRENCIA
        if (row[6] == ""):
            count_rows_without_period += 1
            row_is_incomplete = True

        # row[0] -> ANO_BO
        # row[1] -> NUM_BO
        # row[22] -> DELEGACIA_NOME
        if (row[0] == ""):
            count_rows_without_year += 1
            row_is_incomplete = True
        if (row[1] == ""):
            count_rows_without_bo_number += 1
            row_is_incomplete = True
        if (row[22] == ""):
            count_rows_without_police_station_name += 1
            row_is_incomplete = True

        if (row_is_incomplete):
            continue

        # row[17] -> LATITUDE
        # row[18] -> LONGITUDE
        if (row[17] == "" or row[18] == ""):
            if (row[12] != "" and row[13] != "" and row[15] != ""):
                writer_for_rows_without_location_with_address.writerow([
                    row[0], row[1],
                    convert_date(row[3]),
                    convert_date(row[4]),
                    convert_date(row[5]),
                    convert_period(row[6]),
                    convert_yes_no(row[10]), row[12], row[13], row[14],
                    row[15], row[17], row[18], row[19], row[22],
                    row[23], row[25],
                    (convert_yes_no(row[30]) if row[30] != '' else ''),
                    (row[35][0] if row[35] != '' else ''),
                    (row[37] if (row[37] != '' and row[37] != '0') else ''),
                    (convert_skin_color(row[41]) if row[41] != '' else ''),
                    (row[42] if row[42] != '' else '')
                ])
                count_rows_without_coordinates_location_not_found += 1
                continue
                """
                try:
                    time.sleep(2)
                    location = geocoder.google(row[12] + ", " + row[13] + " - " + ((row[14] + ", ") if row[14] != "" else "") + row[15] + " - SP", key="AIzaSyDn0gFwQ9wWVjFofAuxZVKh8-Pfqg_Y5yM")

                    if (location.ok):
                        row[17] = location.latlng[0]
                        row[18] = location.latlng[1]
                        count_rows_without_coordinates_location_found += 1
                    else:
                        count_rows_without_coordinates_location_not_found += 1
                        row_is_incomplete = True
                except requests.exceptions.RequestException:
                    count_rows_without_coordinates_error_when_geocoding += 1
                """
            else:
                count_rows_without_coordinates_and_address += 1
                row_is_incomplete = True

        if (row_is_incomplete):
            continue

        if ((type(row[17]) is not str and type(row[17]) is not float)
                or (type(row[18]) is not str and type(row[18]) is not float)):
            raise Exception()

        writer.writerow([
            row[0], row[1],
            convert_date(row[3]),
            convert_date(row[4]),
            convert_date(row[5]),
            convert_period(row[6]),
            convert_yes_no(row[10]),
            (row[17].replace(',', '.') if type(row[17]) is str else row[17]),
            (row[18].replace(',', '.') if type(row[18]) is str else row[18]),
            row[19], row[22], row[23], row[25],
            (convert_yes_no(row[30]) if row[30] != '' else ''),
            (row[35][0] if row[35] != '' else ''),
            (row[37] if (row[37] != '' and row[37] != '0') else ''),
            (convert_skin_color(row[41]) if row[41] != '' else ''),
            (row[42] if row[42] != '' else '')
        ])

    return [
        count_rows_without_coordinates_location_found,
        count_rows_without_coordinates_location_not_found,
        count_rows_without_coordinates_error_when_geocoding,
        count_rows_without_coordinates_and_address, count_rows_without_period,
        count_rows_without_year, count_rows_without_bo_number,
        count_rows_without_police_station_name, current_row_index
    ]
Example #30
0
# building_positive dictionary
print('building positive dictionary...')
print('')
positive_dict = build_dict(positive_path)
print('')

#calculate porbabilities
print("calulating probabilities...")
print("iterating through negative_tokens...")
#progress
l = len(negative_dict)
i = 0
progress_bar.printProgressBar(0,
                              l,
                              prefix='Progress:',
                              suffix='Complete',
                              length=50)
for i, k in enumerate(negative_dict):
    progress_bar.printProgressBar(iteration=i + 1,
                                  total=l,
                                  prefix='Progress:',
                                  suffix='Complete',
                                  length=50)

    num_in_negatives = negative_dict[k]
    if k in positive_dict:
        num_in_positives = positive_dict[k]
    else:
        num_in_positives = 0