Example #1
0
def tag_images():
    """
    reads all the rows from read_csv_file into a list, displays one image url and name to the user at a time
    using a global counter, and keeps going until the end of the list is reached. At this point, a "Done Template"
    will be displayed, the counter will be reset, and the read_csv_file will be deleted. Once a user submits
    a tag, it will be appended to the corresponding list data item, and then written out to the write_csv_file.

    :return: rendered form with list(image_url, image_name) as context
    """
    global LIST_INDEX

    # create a sample read_csv_file using the image_list_data at the top
    # list_to_csv(csv_read_file, sample_data_list)

    # read the read_csv_file into a list
    IMAGE_DATA_LIST = csv_to_list(csv_read_file)

    image_data = IMAGE_DATA_LIST[LIST_INDEX]

    new_image_data = image_data[:]
    for value in request.form.values():
        if value is not None:
            new_image_data.append(value)

        CSV_WRITE_DATA.append(new_image_data)
        LIST_INDEX += 1
        if LIST_INDEX == len(IMAGE_DATA_LIST):
            LIST_INDEX = 0
            list_to_csv(csv_write_file, CSV_WRITE_DATA)
            return render_template('done.html')
        image_data = IMAGE_DATA_LIST[LIST_INDEX]

    return render_template('show_image.html', image_data=image_data)
Example #2
0
def main():
    raw_list = csv_to_list(csv_file)[:100]
    total_len = len(raw_list)
    counter = 0
    result_dict = dict()
    print "Commencing Web Scraping..."
    start_time = time.time()
    for raw_link in raw_list:
        try:
            raw_link = raw_link[0]
            whois_link = "http://www.whois.com/whois/" + raw_link
            ipaddress_link = "http://" + raw_link + ".ipaddress.com/"
            whois_soup = link_to_lxmlsoup(whois_link)
            ipaddress_soup = link_to_lxmlsoup(ipaddress_link)
            result_dict.setdefault('Raw Link', []).append(str(raw_link))
            result_dict = whois_parser(whois_soup, result_dict)
            result_dict = ipaddress_parser(ipaddress_soup, result_dict)
            counter, total_len = print_counter(counter, total_len)
            if counter % 400 == 0:
                print "Commencing 30 Second Sleep after 400 iterations"
                time.sleep(30)
            time_elapsed = time.time() - start_time
            print_progress(time_elapsed, counter, total_len)
        except:
            dict_to_json(result_dict, 'output.json')
            dict_to_csv(result_dict, 'output.csv')
            print "Unexpected Error", sys.exc_info()[0]
            raise
    dict_to_json(result_dict, 'output.json')
    dict_to_csv(result_dict, 'output.csv')
Example #3
0
 def clean(self, value):
     try:
         value = csv_to_list(value)
     except csv.csv.Error:
         e = _(u"Please provide a comma separated value list.")
         raise forms.ValidationError(e)
     
     
     return super(FieldArray, self).clean(value)
 def to_python(self, value):
     if value is None:
         return value
     if isinstance(value, basestring):
         value = csv_to_list(value)
     if isinstance(value, (list,tuple)):
         if not self._fieldtype.blank:
             value = [v for v in value if v]
         if self._fieldtype.unique:
             value = list(set(value))
         to_python = self._fieldtype.to_python
         value = [to_python(v) for v in value]
         return value
     
     e = default_error_messages['invalid']
     raise ValidationError(e)
Example #5
0
    def to_python(self, value):
        if value is None:
            return value
        if isinstance(value, basestring):
            value = csv_to_list(value)
        if isinstance(value, (list, tuple)):
            if not self._fieldtype.blank:
                value = [v for v in value if v]
            if self._fieldtype.unique:
                value = list(set(value))
            to_python = self._fieldtype.to_python
            value = [to_python(v) for v in value]
            return value

        e = default_error_messages['invalid']
        raise ValidationError(e)
Example #6
0
def main(path_inputxt, path_outputyt, prefix_outdir, max_l):
    """ Run CSSR, transCSSR, and transCSSR_bc on the given input files and
        generate output files and conditional measures in the given directory.
    Arguments:
        path_inputxt: str
            Path to the inputXt csv file. E.g.: 'csv/inputXt.csv'
        path_outputyt: str
            Path to the outputYt csv file. E.g.: 'csv/outputYt.csv'
        prefix_outdir: str
            Prefix naming of the output directory. Actual output directory will
            be appended with @max_l.
            E.g.: 'output_trans' with @max_l=1 will have outputs under
                './output_trans_L1'
        max_l: int
            The maximum L value for computing the transducer.
    Returns
    """
    # create the final output directory
    dir_out = prefix_outdir + '_L' + str(max_l)
    PATH_DOT_RESULTS = os.path.join(dir_out, 'dot_results')
    os.makedirs(PATH_DOT_RESULTS, exist_ok=True)
    # load in the .csv as column-major 2d-lists
    cols_x = csv_to_list(path_inputxt)
    cols_y = csv_to_list(path_outputyt)
    # Find a set of unique outcomes for each of x & y
    axs = get_uniques_from_2d_list(cols_x)
    ays = get_uniques_from_2d_list(cols_y)

    # List to save C_X & h_X for each pair so we can write to results.csv later
    results = []
    # Headers of CSV:
    header = [
        'machine_name', 'machine_H[X_{0}]', "machine_E", 'machine_C_mu',
        'machine_h_mu',
        'transducer_name', 'transducer_C_mu', 'transducer_h_mu',
        'transducer_bc_name', 'transducer_bc_C_mu', 'transducer_bc_h_mu'

    ]

    # Loop through each pair of columns in the CSVs
    for col_x, col_y in zip(cols_x, cols_y):
        xt_name, yt_name = col_x[0], col_y[0]
        name_machine = '+%s.dot' % xt_name
        name_transducer = '%s+%s.dot' % (xt_name, yt_name)

        stringX, stringY = ''.join(col_x[1:]), ''.join(col_y[1:])
        cssr(stringX, axs, xt_name, max_l)
        HLs, _, h_mu_mach, _, E, C_mu_mach, _ = tC_bc.compute_ict_measures(
            join('transCSSR_results', name_machine),
            axs, 'transCSSR', L_max=max_l
        )

        # generate output files for transducer without BC
        trans(stringX, stringY, axs, ays, xt_name, yt_name, max_l)
        C_mu, h_mu = tC.compute_conditional_measures(
            join('transCSSR_results', name_machine),
            join('transCSSR_results', name_transducer),
            axs, ays, inf_alg='transCSSR')

        # Move transducer output files out of the way so we can generate
        #   trans_bc.
        # Transducer output files: 'X1+Y1.dot' & 'X1+Y1.dat_results'
        # name_transducer has 'X1+Y1.dot' so just need the latter
        name_transducer_dat = name_transducer[:-4] + '.dat_results'
        os.rename(
            join('transCSSR_results', name_transducer),
            join(
                PATH_DOT_RESULTS,
                'trans_' + name_transducer.replace('+', '_')
            )
        )
        os.rename(
            join('transCSSR_results', name_transducer_dat),
            join(
                PATH_DOT_RESULTS,
                'trans_' + name_transducer_dat.replace('+', '_')
            )
        )

        # Now that trans outputs are moved,let's generate trans_bc
        trans_bc(stringX, stringY, axs, ays, xt_name, yt_name, max_l)
        C_mu_bc, h_mu_bc = tC.compute_conditional_measures(
            join('transCSSR_results', name_machine),
            join('transCSSR_results', name_transducer),
            axs, ays, inf_alg='transCSSR')

        # Move them away too but append a '_bc'
        os.rename(
            join('transCSSR_results', name_transducer),
            join(
                PATH_DOT_RESULTS,
                'trans_bc_' + name_transducer.replace('+', '_')
            )
        )
        os.rename(
            join('transCSSR_results', name_transducer_dat),
            join(
                PATH_DOT_RESULTS,
                'trans_bc_' + name_transducer_dat.replace('+', '_')
            )
        )
        # Also move away the cssr. e.g.: +X1.dot & +X1.dat_results
        name_machine_dat = name_machine[:-4] + '.dat_results'
        os.rename(
            join('transCSSR_results', name_machine),
            join(
                PATH_DOT_RESULTS, 'mach_' + name_machine.replace('+', '_')
            )
        )
        os.rename(
            join('transCSSR_results', name_machine_dat),
            join(
                PATH_DOT_RESULTS, 'mach_' + name_machine_dat.replace('+', '_')
            )
        )

        results.append([
            'mach_' + name_machine.replace('+', '_'),
            HLs[0], E, C_mu_mach, h_mu_mach,
            'trans_' + name_transducer.replace('+', '_'), C_mu, h_mu,
            'trans_bc_' + name_transducer.replace('+', '_'), C_mu_bc, h_mu_bc
        ])

    # Now that we have moved all of the .dot & .dat_results, let's create the
    #   results.csv that has the C_mu & h_mu
    with open(join(dir_out, 'results.csv'), 'w') as f:
        writer = csv.writer(f, quoting=2)
        writer.writerow(header)
        for row in results:
            writer.writerow(row)
Example #7
0
def main(path_outputyt, prefix_outdir, max_l):
    """ Run CSSR on the given input files and generate output files and
        conditional measures in the given directory.
    Arguments:
        path_outputyt: str
            Path to the outputYt csv file. E.g.: 'csv/outputYt.csv'
        prefix_outdir: str
            Prefix naming of the output directory. Actual output directory will
            be appended with @max_l.
            E.g.: 'output_trans' with @max_l=1 will have outputs under
                './output_trans_L1'
        max_l: int
            The maximum L value for computing the transducer.
    Returns
    """
    # create the final output directory
    dir_out = prefix_outdir + '_L' + str(max_l)
    PATH_DOT_RESULTS = os.path.join(dir_out, 'dot_results')
    os.makedirs(PATH_DOT_RESULTS, exist_ok=True)
    # load in the .csv as column-major 2d-lists
    cols_y = csv_to_list(path_outputyt)
    # Find a set of unique outcomes for each of x & y
    ays = get_uniques_from_2d_list(cols_y)

    # List to save C_X & h_X for each pair so we can write to results.csv later
    results = []
    # Headers of CSV:
    header = [
        'machine_name',
        'machine_H[X_{0}]',
        "machine_E",
        'machine_C_mu',
        'machine_h_mu',
    ]
    # Loop through each pair of columns in the CSVs
    for col_y in cols_y:
        yt_name = col_y[0]
        name_machine = '+%s.dot' % yt_name

        stringY = ''.join(col_y[1:])
        cssr(stringY, ays, yt_name, max_l)
        HLs, _, h_mu_mach, _, E, C_mu_mach, _ = compute_ict_measures(
            join('transCSSR_results', name_machine),
            ays,
            'transCSSR',
            L_max=max_l)
        # Also move away the cssr. e.g.: +Y1.dot & +Y1.dat_results
        name_machine_dat = name_machine[:-4] + '.dat_results'
        os.rename(
            join('transCSSR_results', name_machine),
            join(PATH_DOT_RESULTS, 'mach_' + name_machine.replace('+', '_')))
        os.rename(
            join('transCSSR_results', name_machine_dat),
            join(PATH_DOT_RESULTS,
                 'mach_' + name_machine_dat.replace('+', '_')))
        results.append([
            'mach_' + name_machine.replace('+', '_'),
            HLs[0],
            E,
            C_mu_mach,
            h_mu_mach,
        ])

    # Now that we have moved all of the .dot & .dat_results, let's create the
    #   results.csv that has the C_mu & h_mu
    with open(join(dir_out, 'results.csv'), 'w') as f:
        writer = csv.writer(f, quoting=2)
        writer.writerow(header)
        for row in results:
            writer.writerow(row)