Esempio n. 1
0
 def test_watch(self):
     seq = [1, 2, 3, 4, 5]
     w = lambda: 1
     bar = frogress.bar(seq, watch=w)
     self.assertIsInstance(bar, frogress.WatchBar)
     self.assertEqual(bar.watch, w)
     self.assertIs(bar.iterable, seq)
Esempio n. 2
0
def execute():
    with global_connection():
        result = []
        for doc in frogress.bar(load_documents(Courts.CONSTITUTIONAL)):
            doc_info = doc.content_info
            result.append(doc_info)
    output.save_csv(pd.DataFrame(result), 'data')
Esempio n. 3
0
def main():
    if args.file:
        check_available_storage()
        output = ''
        if not args.quiet:
            print('{} {}'.format(name, __version__))
            print('Generiere {} ({})'.format(args.file,
                                             sizeof_fmt(total_vcard_size)))
            for i in frogress.bar(range(args.count),
                                  steps=args.count,
                                  widgets=widgets):
                output += generate_vcard()
                if (
                        i % 250
                ) == 0 or i == args.count - 1:  # schreibe immer 250 vcards in datei
                    with open(args.file, mode='a', encoding='utf-8') as f:
                        f.write(output)
                        output = ''
            print("\n")
        else:
            for i in range(args.count):
                output += generate_vcard()
                if (
                        i % 250
                ) == 0 or i == args.count - 1:  # schreibe immer 250 vcards in datei
                    with open(args.file, mode='a', encoding='utf-8') as f:
                        f.write(output)
                        output = ''
    else:
        for i in range(args.count):
            print(generate_vcard(), end='')
Esempio n. 4
0
def download_iamges(image_data, n_images, output_dir):
    """
    Download a specified number of images to out_dir.

    :param _elementtree._element_iterator image_data: information to
                                                      download images
    :param int n_images: number of images to download
    :param str output_dir: directory to store the images
    """

    urls = (make_thumb_url(image) for image in image_data)
    reqs = (grequests.get(url) for url in urls)
    responses = grequests.imap(reqs)

    responses = frogress.bar(responses, steps=n_images)
    print('\nDownloading {} images'.format(n_images))

    os.makedirs(output_dir, exist_ok=True)

    for r in responses:
        try:
            url = urllib.parse.urlparse(r.url)
            filename, _ = os.path.splitext(os.path.basename(url.path))
            output_file_path = os.path.join(output_dir, filename + '.jpg')
            with open(output_file_path, 'wb') as output_file:
                output_file.write(r.content)
        finally:
            r.close()
Esempio n. 5
0
    def get_hash(self, filename):
        #hasher = hashlib.md5()
        hasher = xxhash.xxh64()

        widgets = [
            frogress.PercentageWidget, frogress.BarWidget,
            frogress.TransferWidget(filename + ' '), frogress.EtaWidget,
            frogress.TimerWidget
        ]
        filesize = self._get_file_size(filename)
        try:
            with open(filename, 'rb') as f:
                chunks = self._iter_file(f, self.blocksize)
                if filesize == 0:
                    for chunk in chunks:
                        hasher.update(chunk)
                    print(
                        "100.0%% | [##########] | %s 0-bytes | ETA: -- | Time: 0.0s"
                        % filename)
                else:
                    for chunk in frogress.bar(chunks,
                                              source=f,
                                              widgets=widgets):
                        hasher.update(chunk)
                    print
        except IOError as e:
            return None

        return hasher.hexdigest()
Esempio n. 6
0
    def brute_force(self):
        """
        Experimental method for running brute force approach for
        decrypting of text encrypted by Vigenere cipher.
        """
        self.logger.info("Trying to decryption of text '{}' by brute forcing".format(self._encrypted_text))

        if self._key_len:
            possible_key_lens = [self._key_len]

        else:
            possible_key_lens = self._get_possible_key_lens()
            if not possible_key_lens:
                self.logger.error("There is no possible key length or very short, application ends")
                return

        # Increase a threshold in comparison to threshold in GAs.
        threshold_score = (THRESHOLD_SCORE_PER_WORD * 1.25) * len(self._encrypted_text.split())
        final_key = None

        for key_len in possible_key_lens:
            space_dim = 26 ** key_len  # 26 is the sice of alphabet
            population = Population(self._encrypted_text, self._force_train, POPULATION_SIZE, key_len)

            for key in frogress.bar(self._bruteforce_keys_generation(key_len), steps=space_dim):
                score = population.evaluate_fitness_of_member(key)
                self.logger.info("Key '{}' (score: {})".format(key, score))
                if score > threshold_score:
                    final_key = key
                    break

        final_plaintext = Vigenere(final_key).decipher(self._encrypted_text)

        self.logger.info("Final key: '{}'".format(final_key))
        self.logger.info("Final plaintext: '{}'".format(self._correct_plaintext(final_plaintext)))
Esempio n. 7
0
def part1():
    print("Reading dustmap...")
    dust = pd.read_csv(dust_path, sep = ' ', usecols = [0,1,-1], names = ['procell','skycell','SDSS_i_avg'], header = None)
    dustprocells = np.asarray(dust["procell"].values).astype(int)
    dustskycells = np.asarray(dust["skycell"].values).astype(int)
    dust_avg = np.asarray(dust["SDSS_i_avg"].values)

    print("Reading Mag lim catalog")
    data = Table.read(cut_path, format = 'fits')
    data = data.to_pandas()
    skycells = np.asarray(data["skyCellID"].values).astype(int)
    procells = np.asarray(data["projectionid"].values).astype(int)
    mag = np.asarray(data["max_iKronMag"].values)

    for ii in frogress.bar(range(skycells.shape[0])):
        skycell = skycells[ii]
        procell = procells[ii]
        idx = np.where((dustprocell s== procell) & (dustskycells == skycell))[0]
        if idx.size != 1:
            print(idx, procell, skycell)
            print("Failed")
            break
        if dust_avg[idx] > -20.0:
            mag[ii] -= dust_avg[idx]	
        else:
            print("No Mask")
            mag[ii] = 0.0
    idx = (mag < mag_cut)
    outpro = procells[idx]
    outpro = outpro.reshape((outpro.size, 1))
    outsky = skycells[idx]
    outsky = outsky.reshape((outsky.size, 1))
    inpu = np.hstack((outpro, outsky))

    print("Calculating ra, dec borders...")
    outpro = inpu[:,0]
    outsky = inpu[:,1]
    minras, maxras, mindecs, maxdecs = tools.get_centers(outpro, outsky, borders = True)
    print("Check fully missing masks...")
    idx = np.where(np.abs(minras - (-99)) < 0.0001)[0]
    idbroken = np.where(np.abs(minras - (-999) ) <0.0001)[0]
    print("There are %s masks that cannot be processed" % idbroken.size)
    brokenpro = np.take(outpro, idx)
    startcell, M, zone, xsub, ysub, xsize, ysize, mindec, maxdec, prodec = tools.get_cell_info(brokenpro)
    step = 360.0/M
    cen = step*(brokenpro - startcell)
    highra = cen + step/2.0
    lowra = cen - step/2.0
    bx = lowra < 0.0
    lowra[bx] = lowra[bx] + 360.0
    minras[idx] = lowra
    mindecs[idx] = mindec
    maxras[idx] = highra
    maxdecs[idx] = maxdec

    output = pd.DataFrame({'pro':outpro,'sky':outsky,'minras':minras,'maxras':maxras,'mindecs':mindecs,'maxdecs':maxdecs})

    output.to_csv(outfile_1, index = False)
Esempio n. 8
0
def part2(rank):

    steps = 2643 - 635
    chunky = int(steps/size)
    rest = steps - chunky*size
    mini = chunky*rank
    maxi = chunky*(rank + 1)
    if rank >= (size - 1) - rest:
        maxi += 2 + rank - size + rest)
        mini += rank - size + 1 + rest
    if rank == size - 1:
        maxi = steps + 1
    mini += 635
    maxi += 635
    mini = int(mini)
    maxi = int(maxi)


    bad_pixels = np.zeros(0)
    for pro in frogress.bar(range(mini, maxi)):
        flag = False
        for i in range(0, 100):
             try:
                 foohdu = fits.open("%s/mask_%04d.%3d0.fits" % (mask_directory, pro, i), ignore_missing_end = True)
                 break
             except:
                 pass
             if i == 99:
                 flag = True
        if flag == True:
            continue
        w = wcs.WCS(foohdu[1].header)
        for sky in range(0, 100):
            try:
                mask = fits.open('%s/mask_%04d.%03d.fits' % (mask_directory, pro, sky), ignore_missing_end = True)
                body = mask[1].data
                width = body.shape[1]
                height = body.shape[0]
            except:
                continue
            #cut off borders of skycell
            body = body[20:-20,20:-20]
            badpix = np.where(body != 0)
            bady = badpix[0]
            badx = badpix[1]
            bady += 20
            badx += 20
            world = w.all_pix2world(badx, bady, 0, ra_dec_order = True)
            ra = np.radians(world[0])
            dec = np.radians(np.subtract(90.0, world[1]))
            pix = hp.pixelfunc.ang2pix(NSIDE, dec, ra)
            pix = np.unique(pix)
            pix = pix.astype(int)
            bad_pixels = np.append(bad_pixels, pix)

    bad_pixels = np.unique(bad_pixels)
    np.savetxt("%s/mask_pixel_save_%s.dat" % (mid_directory, rank), bad_pixels)
Esempio n. 9
0
def main():
    args = parse_args()

    db_conn = pymysql.connect(**args.mysql_url)

    insert_tpl = '''
    INSERT INTO `mag_papers` (
        `paper_id`,
        `original_paper_title`,
        `normalized_paper_title`,
        `paper_publish_year`,
        `paper_publish_date`,
        `paper_doi`,
        `original_venue_name`,
        `normalized_venue_name`,
        `journal_id_mapped_to_venue_name`,
        `conference_series_id_mapped_to_venue_name`,
        `paper_rank`
    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    '''

    if args.create_tables:
        print('Creating tables and indexes')
        create_tables_and_indexes(db_conn.cursor())
        db_conn.commit()

    print('Reading', args.input_csv, '...')
    input_file = utils.open_compressed_file(args.input_csv)
    cursor = db_conn.cursor()
    with input_file, cursor:
        csvreader = csv.reader(
            input_file,
            delimiter='\t',
            quoting=csv.QUOTE_NONE,
        )
        records = (parse_papers_record(r) for r in csvreader)

        records_truncated = ((
            r.paper_id[:50],
            r.original_paper_title[:255],
            r.normalized_paper_title[:255],
            r.paper_publish_year,
            r.paper_publish_date,
            r.paper_doi[:255],
            r.original_venue_name[:255],
            r.normalized_venue_name[:255],
            r.journal_id_mapped_to_venue_name[:255],
            r.converence_series_id_mapped_to_venue_name[:255],
            r.paper_rank,
        ) for r in records)

        records_with_progress = frogress.bar(
            records_truncated,
            steps=args.expected_records,
        )
        cursor.executemany(insert_tpl, records_with_progress)
    db_conn.commit()
def main():
	output = ''
	count = 1000
	widgets = [frogress.BarWidget, frogress.PercentageWidget, frogress.ProgressWidget('vCard ')]
	for i in frogress.bar(range(count), steps=count, widgets=widgets):
		output += generate_vCard()

	with codecs.open('output.vcf', 'w', 'utf-8') as f:
		f.write(output)
Esempio n. 11
0
 def test_file(self):
     with tempfile.NamedTemporaryFile('w') as tmp:
         text = 'foobar\n' * 25
         tmp.write(text)
         tmp.flush()
         f = open(tmp.name)
         bar = frogress.bar([], source=f)
         self.assertIsInstance(bar, frogress.TransferBar)
         self.assertEqual(bar.step_callback, f.tell)
         self.assertEqual(bar.steps, len(text))
Esempio n. 12
0
 def test_file(self):
     with tempfile.NamedTemporaryFile('w') as tmp:
         text = 'foobar\n' * 25
         tmp.write(text)
         tmp.flush()
         f = open(tmp.name)
         bar = frogress.bar([], source=f)
         self.assertIsInstance(bar, frogress.TransferBar)
         self.assertEqual(bar.step_callback, f.tell)
         self.assertEqual(bar.steps, len(text))
Esempio n. 13
0
def bin_redshifts(redshift, min_z, max_z, z_steps):
    z_edges = np.linspace(min_z, max_z, num=z_steps)
    bin_num = np.zeros(redshift.size)
    for ii, z in frogress.bar(enumerate(redshift)):
        it = 0
        upper_z = z_edges[it]
        while z > upper_z:
            it += 1
            upper_z = z_edges[it]
        bin_num[ii] = it
    return bin_num
Esempio n. 14
0
def main(path_input):

    # read the original fragments file
    df = pd.read_csv(path_input,
                     sep="\t",
                     header=None,
                     names=["chr", "start", "end", "cb", "counts"])

    # generate a table describing each chromosome's length
    chrs = "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY chrM"
    ends = "197195432 181748087 159599783 155630120 152537259 149517037 152524553 131738871 124076172 129993255 121843856 121257530 120284312 125194864 103494974 98319150 95272651 90772031 61342430 166650296 15902555 16299"

    chrs = chrs.split(" ")
    ends = ends.split(" ")

    df_contigs = pd.DataFrame({"chromosome": chrs, "len": ends})

    df_contigs.len = df_contigs.len.astype(int)

    # find problematic fragments that exceed contig size (due to cell ranger bug)
    problematics = []

    with pysam.Tabixfile(path_input) as tbx:
        for i, contig in df_contigs[
                df_contigs.chromosome != "chrM"].iterrows():

            for row in tbx.fetch(contig.chromosome,
                                 contig.len - 1,
                                 contig.len,
                                 parser=pysam.asBed()):
                problematics.append(row)

    # write to problematics.csv
    df_problematics = pd.DataFrame(
        map(lambda item: (item.contig, item.start, item.end, item.name),
            problematics))
    df_problematics.to_csv("problematics.csv",
                           sep="\t",
                           header=False,
                           index=False)

    # iterate through problematic fragments and correct the coordinates
    for p in bar(problematics):

        # retrieve the proper end of the contig
        proper_end = df_contigs.loc[df_contigs.chromosome == p.contig,
                                    "len"].values[0]

        # correct
        df.loc[(df.chr == p.contig) & (df.start == p.start)
               & (df.cb == p.name), "end"] = proper_end

    # write to disk
    df.to_csv("fragments.tsv", sep="\t", header=False, index=False)
Esempio n. 15
0
    def fit(self, train_data, test_data, n_iter=1, prefix=None):
        learning_curve = []

        for i in range(n_iter):
            n_error = 0
            n_predicted = 0

            train_der = GreedyDiarizationErrorRate()

            shuffle(train_data)

            for example in bar(train_data):

                if len(example) == 2:
                    observations, labels = example
                    partial_labels = [None] * len(labels)
                else:
                    observations, labels, partial_labels = example

                predicted = [
                    pl for pl, state in self._infer(observations, labels,
                                                    partial_labels)
                ]
                assert len(predicted) == len(observations)

                train_der(*convert_labels(labels, predicted))
                err, length = binary_loss_with_matching(predicted,
                                                        labels,
                                                        return_rational=True)

                n_predicted += length
                n_error += err

            print("iteration {}".format(i))
            print("error: {:.2%}/{:.2%}".format(n_error / n_predicted,
                                                abs(train_der)))

            w = deepcopy(self.model.weight)
            self.model.avg()
            test_loss, test_der = self.test(
                test_data,
                dump=prefix.format(i) if prefix is not None else None)
            print("test: {:.2%}/{:.2%}".format(test_loss, test_der))

            self.model.weight = w

            learning_curve.append({
                "train_loss": n_error / n_predicted,
                "train_der": abs(train_der),
                "test_loss": test_loss,
                "test_der": test_der
            })
        return learning_curve
Esempio n. 16
0
def main():
    output = ''
    count = 1000
    widgets = [
        frogress.BarWidget, frogress.PercentageWidget,
        frogress.ProgressWidget('vCard ')
    ]
    for i in frogress.bar(range(count), steps=count, widgets=widgets):
        output += generate_vCard()

    with codecs.open('output.vcf', 'w', 'utf-8') as f:
        f.write(output)
Esempio n. 17
0
def parse_file(xml_file, db):

    context = etree.iterparse(xml_file, tag=oca_tag('Index'))

    for action, case in frogress.bar(context):

        # If case already exists in DB delete it,
        # if we have delete instructions don't re-add it,
        # otherwise parse the case and insert it into the various tables.
        parse_case(case, db)

        # Clear the case element to free memory
        case.clear()
Esempio n. 18
0
def show_progress(title, iterable, **kwargs):
    sleep = kwargs.pop('sleep', 0.01)
    timeout = kwargs.pop('timeout', 2)
    line = '  %s  ' % title
    print(line.center(80, '=') + '\n')
    started = datetime.datetime.now()
    for item in frogress.bar(iterable, **kwargs):
        if sleep:
            time.sleep(sleep)
        if (datetime.datetime.now() - started).seconds > timeout:
            print('\n[Timeout reached: %.1fs]' % timeout)
            break
    print('\n')
Esempio n. 19
0
 def test_watch_file(self):
     seq = [1, 2, 3, 4, 5]
     w = lambda: 1
     with tempfile.NamedTemporaryFile('w') as tmp:
         text = 'foobar\n' * 25
         tmp.write(text)
         tmp.flush()
         f = open(tmp.name)
         bar = frogress.bar(seq, watch=w, source=f)
         self.assertIsInstance(bar, frogress.TransferWatchBar)
         self.assertIs(bar.watch, w)
         self.assertEqual(bar.step_callback, f.tell)
         self.assertEqual(bar.steps, len(text))
         self.assertIs(bar.iterable, seq)
Esempio n. 20
0
def main():
	output = ''

	# Angegebene Anzahl an VCards generieren
	if not args.quiet:
		for i in frogress.bar(range(args.count), steps=args.count, widgets=widgets):
			output += generate_vcard()
		print("\n")
	else:
		for i in range(args.count):
			output += generate_vcard()

	# VCards in Datei schreiben
	with open(args.filename, 'w') as f:
		f.write(output)
Esempio n. 21
0
def clean_index(dirname):
    # Always create the index from scratch
    if not os.path.exists(dirname):
        os.mkdir(dirname)
    print(dirname, get_schema())
    ix = index.create_in(dirname, get_schema())
    writer = ix.writer()

    doclist = [f for f in listdir(datapath) if isfile(join(datapath, f))]
    # Assume we have a function that gathers the filenames of the
    # documents to be indexed
    #for path in my_docs():
    l = len(doclist)
    #for i, filename in enumerate(doclist):
    for filename in frogress.bar(doclist):
        add_doc(writer, u""+datapath + filename)
    writer.commit()
Esempio n. 22
0
def get_perturbed_magnitudes():
    # Read the matched file for SDSS z and PS magnitudes
    df = pandas.read_csv(
        "/work/dominik.zuercher/Output/match_PS_GAMA/matched_spec_new.dat",
        delim_whitespace=1,
        usecols=(0, 3, 4, 5, 6),
        header=None,
        names=(["zred", "rPS", "gPS", "iPS", "blue"]))

    # Read in Dominik's median errors as a function of g band magnitude and then perturb
    gmag, gmag_err = np.loadtxt("/work/dominik.zuercher/g_band_error.txt",
                                unpack=1)
    rmag, rmag_err = np.loadtxt("/work/dominik.zuercher/r_band_error.txt",
                                unpack=1)

    gspl = UnivariateSpline(gmag, np.log10(gmag_err))
    rspl = UnivariateSpline(rmag, np.log10(rmag_err))

    # Check the splines
    # xx = np.arange(15.0, 24.0, 0.01)
    # np.savetxt("test_gspl.dat", zip(xx, 10.0**gspl(xx)))
    # np.savetxt("test_rspl.dat", zip(xx, 10.0**rspl(xx)))

    df["gPS_pert"] = df.zred.values * 0.0
    df["rPS_pert"] = df.zred.values * 0.0
    import frogress
    for ii in frogress.bar(range(df.zred.size)):
        # First get the apparent magnitude at the redshift
        appmag_i = get_app_magnitude(df.zred.values[ii])

        appmag_g = df.gPS.values[ii] + (appmag_i - df.iPS.values[ii])
        appmag_r = df.rPS.values[ii] + (appmag_i - df.iPS.values[ii])

        # Now perturb according to the errors
        g_err = 10.**gspl(appmag_g)
        r_err = 10.**rspl(appmag_r)

        appmag_g += np.random.normal() * g_err
        appmag_r += np.random.normal() * r_err

        df.gPS_pert.values[ii] = appmag_g
        df.rPS_pert.values[ii] = appmag_r

    df.to_csv("Perturbed_match.dat", sep=" ", index=False)
Esempio n. 23
0
    def _run_evolution(self, key_len):
        """
        Method for running evolution process. It works with 1 set of probable solutions (determined by key_len).

        :param int key_len: a length of key (solution - population member)
        :return: fittest member with its statistics
        :rtype: OrderedDict
        """
        self.logger.info("Evolution started")

        fittest_member_stats = OrderedDict()

        population = Population(self._encrypted_text, self._force_train, POPULATION_SIZE, key_len)
        population.init_new()

        threshold_score = THRESHOLD_SCORE_PER_WORD * len(self._encrypted_text.split())

        for gen_nr in frogress.bar(range(1, MAX_GENERATION_AMOUNT + 1)):
            population.calculate_fitness()

            self.logger.debug("Generation number {}.".format(gen_nr))
            self.logger.debug(population)  # change this line if you want to see each members of populations with score!

            member, score = population.get_fittest_member()

            if gen_nr == MAX_GENERATION_AMOUNT or score >= threshold_score:
                self.logger.info(
                    "Fittest member from all generations is: '{}' (score: {}, from generation: {})".format(
                        member, score, gen_nr)
                )
                fittest_member_stats["member"] = member
                fittest_member_stats["fitness_score"] = score
                fittest_member_stats["generation"] = gen_nr
                fittest_member_stats["key_len"] = key_len
                fittest_member_stats["reached_threshold"] = True if score >= threshold_score else False
                return fittest_member_stats

            # Forming new population - picking pairs of parents, do crossover and do mutation.
            parent_pairs = population.perform_roulette_wheel()

            # crossovered_pairs = population.perform_1p_crossover(parent_pairs)
            crossovered_pairs = population.perform_2p_crossover(parent_pairs)
            population.perform_mutation(crossovered_pairs)
Esempio n. 24
0
def get_cuts(color, bin_num, bins, z_steps, confidence, include_PS_errors):
    cuts = np.zeros(z_steps - 1)
    for it in frogress.bar(range(1, z_steps)):
        colors = color[bin_num == it]
        hist, edges = np.histogram(colors, bins=bins)
        try:
            popt, pcov = curve_fit(double_gaussian,
                                   edges[:-1] + (edges[1] - edges[0]) / 2.,
                                   hist)
        except:
            cuts[it - 1] = -1000
            continue
        red_ind = np.argmax([popt[1], popt[4]])
        if red_ind == 0:
            red_mu = popt[1]
            red_sigma = popt[2]
        else:
            red_mu = popt[4]
            red_sigma = popt[5]

        if include_PS_errors == False:
            cut = red_mu - confidence * np.abs(red_sigma)
        else:
            cut = red_mu - confidence * np.sqrt(red_sigma**2.0 + red_err**2.0 +
                                                green_err**2.0)

        cuts[it - 1] = cut

        if (it % 10 == 0):
            plt.figure(it / 10)
            plt.hist(colors, bins=bins)
            gr_range = edges[:-1] + (edges[1] - edges[0]) / 2.
            plt.plot(gr_range,
                     double_gaussian(gr_range, popt[0], popt[1], popt[2],
                                     popt[3], popt[4], popt[5]),
                     'k-',
                     lw=0.3)
            plt.axvline(cut, color='r', lw=0.3)
            plt.savefig("%s/test_hist_%s.pdf" % (output_dir, it / 10))
    return cuts
Esempio n. 25
0
def get_cuts(color, bin_num, bins, z_steps, confidence, include_PS_errors):
    cuts = np.zeros(z_steps - 1)
    for it in frogress.bar(range(1, z_steps)):
        colors = color[bin_num == it]
        hist, edges = np.histogram(colors, bins=bins)
        try:
            popt, pcov = curve_fit(double_gaussian,
                                   edges[:-1] + (edges[1] - edges[0]) / 2.,
                                   hist)
        except:
            cuts[it - 1] = -1000
            continue

        red_ind = np.argmax([popt[1], popt[4]])
        if red_ind == 0:
            red_mu = popt[1]
            blue_mu = popt[4]
            red_sigma = popt[2]
        else:
            red_mu = popt[4]
            blue_mu = popt[1]
            red_sigma = popt[5]

        xx = edges[:-1] + (edges[1] - edges[0]) / 2.

        res = double_gaussian(xx, popt[0], popt[1], popt[2], popt[3], popt[4],
                              popt[5])
        cut = xx[np.argmin(res[(xx < red_mu) & (xx > blue_mu)])]

        cuts[it - 1] = cut
        """
	plt.figure(it)
	plt.hist(colors, bins = bins)
	gr_range = edges[:-1] + (edges[1] - edges[0])/2.
	plt.plot(gr_range, double_gaussian(gr_range, popt[0], popt[1], popt[2], popt[3], popt[4], popt[5]), 'k-', lw = 0.3)
	plt.axvline(cut, color = 'r', lw = 0.3)
	plt.savefig("%s/test_hist_%s.pdf" % ("/work/dominik.zuercher/Output/match_PS_GAMA", it ))
        """
    return cuts
Esempio n. 26
0
    def tosankey(self):
        '''db to sankey json format.
        needs to be multiprocessed'''

        # define needed constants
        self.constants()

        # :( mp will thro a PicklingError
        # PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed
        # pool = mp.Pool(processes=4)
        # pool.apply_async(self.profiletosankey, self.browsers)
        # pool.close()
        # pool.join()

        print 'Counting needed values with database commands. Please wait.'
        for b in frogress.bar(self.browsers):
            self.profiletosankey(b)

        # write to the dictionary and json
        print '\nWriting nodes to json.'
        nodesncv = self.nodemaker(self.nonconversion, self.ncv[0])
        nodescv = self.nodemaker(self.conversion, self.cv[0])
        self.nodes = nodesncv + nodescv

        print 'Writing links to json.'
        linksncv = self.linkmaker(self.nonconversion)
        linkscv = self.linkmaker(self.conversion)
        self.links = linksncv + linkscv

        print 'The final file is at %s' % self.jsonfile

        def writejson(links, nodes, outfile):
            '''lol so much repeating and helper functions'''
            linksandnodes = {"links": links, "nodes": nodes}
            with open(outfile, 'w') as f:
                json.dump(linksandnodes, f)

        writejson(self.links, self.nodes, self.jsonfile)
Esempio n. 27
0
    def tosankey(self):
        '''db to sankey json format.
        needs to be multiprocessed'''

        # define needed constants
        self.constants()

        # :( mp will thro a PicklingError
        # PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed
        # pool = mp.Pool(processes=4)
        # pool.apply_async(self.profiletosankey, self.browsers)
        # pool.close()
        # pool.join()

        print 'Counting needed values with database commands. Please wait.'
        for b in frogress.bar(self.browsers):
            self.profiletosankey(b)

        # write to the dictionary and json
        print '\nWriting nodes to json.'
        nodesncv = self.nodemaker(self.nonconversion, self.ncv[0])
        nodescv  = self.nodemaker(self.conversion, self.cv[0])
        self.nodes = nodesncv + nodescv

        print 'Writing links to json.'
        linksncv = self.linkmaker(self.nonconversion)
        linkscv  = self.linkmaker(self.conversion)
        self.links = linksncv + linkscv
        
        print 'The final file is at %s' % self.jsonfile
        def writejson(links, nodes, outfile):
            '''lol so much repeating and helper functions'''
            linksandnodes = {"links":links, "nodes":nodes}
            with open(outfile, 'w') as f:
                json.dump(linksandnodes, f)

        writejson(self.links, self.nodes, self.jsonfile)
Esempio n. 28
0
def main():
    if args.file:
        check_available_storage()
        output = ''
        if not args.quiet:
            print('{} {}'.format(name, __version__))
            print('Generiere {} ({})'.format(args.file, sizeof_fmt(total_vcard_size)))
            for i in frogress.bar(range(args.count), steps=args.count, widgets=widgets):
                output += generate_vcard()
                if (i % 250) == 0 or i == args.count - 1: # schreibe immer 250 vcards in datei
                    with open(args.file, mode='a', encoding='utf-8') as f:
                        f.write(output)
                        output = ''
            print("\n")
        else:
            for i in range(args.count):
                output += generate_vcard()
                if (i % 250) == 0 or i == args.count - 1: # schreibe immer 250 vcards in datei
                    with open(args.file, mode='a', encoding='utf-8') as f:
                        f.write(output)
                        output = ''
    else:
        for i in range(args.count):
            print(generate_vcard(), end='')
Esempio n. 29
0
def main(f_old, f_new, f_out_prefix):

    START_TIME = time.time()

    parser = ET.XMLParser(remove_blank_text=True)
    tree_old = ET.parse(f_old, parser)
    tree_new = ET.parse(f_new, parser)

    all_new_ways = tree_new.findall('way')
    all_old_ways = tree_old.findall('way')

    removeways = []

    # loop through all ways in old file and look for deleted ways
    print('Scanning for deleted ways')
    for i, way_old in frogress.bar(enumerate(all_old_ways), steps=len(all_old_ways)):
        if reportprogress:
            print('{}/{} ways done'.format(i+1, len(all_new_ways)), end='\r')
        transid = way_old.find('tag[@k="TRANSID"]').attrib['v']
        way_new = tree_new.find('way/tag[@k="TRANSID"][@v="' + transid + '"]/..')
        if way_new is not None:
            # corresponding new way exists, so remove it
            removeways.append(way_old)

    # loop through all ways in new file and look for changed/new ways
    print('Scanning for changed/new ways')
    for i, way_new in frogress.bar(enumerate(all_new_ways), steps=len(all_new_ways)):
        if reportprogress:
            print('{}/{} ways done'.format(i+1, len(all_new_ways)), end='\r')

        changes = []

        # dict with tags of new way
        transid = way_new.find('./tag[@k="TRANSID"]').get('v')

        # way from old file with mathing TRANSID
        way_old = tree_old.find('.//way/tag[@k="TRANSID"][@v="' + transid + '"]/..')

        # check if way is new (nonexistent in old)
        if way_old is None:
            ET.SubElement(way_new, 'tag', dict(k='ELVEGDIFF_CHANGES', v='NEW WAY'))
            continue  # skip the rest of the checks

        # check if tags have changed
        changes.extend(tag_changes(way_old, way_new))

        # check if nodes have changed
        nds_old = way_old.findall('nd')
        nds_new = way_new.findall('nd')
        nodes_old = []
        nodes_new = []
        for nds, nodelist in [(nds_old, nodes_old), (nds_new, nodes_new)]:
            for nd in nds:
                for node in nd.getparent().itersiblings(tag='node', preceding=True):
                    if node.get('id') == nd.attrib['ref']:
                        nodelist.append(node)
                        break
        changes.extend(node_changes(nodes_old, nodes_new))

        if changes:
            ET.SubElement(way_new, 'tag', dict(k='ELVEGDIFF_CHANGES', v='\n'.join(changes)))
        else:
            # remove element
            way_new.getparent().remove(way_new)

    # remove the ways to be removed
    for way_old in removeways:
        way_old.getparent().remove(way_old)

    # loop through all childless nodes and check if they're referenced anywhere
    allnodes = tree_old.findall('node') + tree_new.findall('node')
    print('Removing unreferenced nodes without tags')
    for i, node in frogress.bar(enumerate(allnodes), steps=len(allnodes)):

        if reportprogress:
            print('{}/{} nodes done'.format(i+1, len(allnodes)), end='\r')

        if len(node):
            # node has children, skip
            continue

        # assume the node is unreferenced and should be removed,
        # and check for references below
        remove = True

        id_ = node.attrib['id']

        next_ways = node.itersiblings(tag='way')  # node should be referenced here

        # TODO: if it is possible that nodes are references ABOVE where they are in the file,
        # include the following line and use itertools.chain to chain next_ways and prev_ways
        #prev_ways = node.itersiblings(tag='way', preceding=True)  # included just in case

        for way in next_ways:
            for nd in way.iterchildren(tag='nd'):
                if nd.get('ref') == id_:
                    # <node> is referenced
                    remove = False
                    break

            # if we found a referencing <nd>, break out of loop
            if not remove:
                break

        # if no nd is found referencing this node, remove it
        if remove:
            node.getparent().remove(node)

    print('Scanning for changes in tagged nodes')
    old_node_list = [n for n in tree_old.findall('node') if len(n)]
    new_node_list = [n for n in tree_new.findall('node') if len(n)]
    old_nodes = {}
    new_nodes = {}
    for nodes, dct in [(old_node_list, old_nodes), (new_node_list, new_nodes)]:
        for node in nodes:
            latlon = (node.attrib['lat'], node.attrib['lon'])
            dct[latlon] = node

    # added/deleted nodes
    added_nodes_latlon = set(new_nodes.keys()).difference(old_nodes.keys())
    deleted_nodes_latlon = set(old_nodes.keys()).difference(new_nodes.keys())

    # add change description tag to added nodes
    for latlon in added_nodes_latlon:
        ET.SubElement(new_nodes[latlon], 'tag', dict(k='ELVEGDIFF_CHANGES', v='NEW TAGGED NODE'))

    # changed nodes
    changed_nodes_latlon = []
    for latlon in set(new_nodes.keys()).intersection(old_nodes.keys()):
        changes = tag_changes(old_nodes[latlon], new_nodes[latlon])
        if changes:
            changed_nodes_latlon.append(latlon)
            ET.SubElement(new_nodes[latlon], 'tag', dict(k='ELVEGDIFF_CHANGES', v='\n'.join(changes)))

    # if nodes are neither changed nor added, remove from new file
    unchanged_nodes_latlon = set(new_nodes.keys()).difference(added_nodes_latlon, changed_nodes_latlon)
    for latlon in unchanged_nodes_latlon:
        # TODO: don't remove it it's referenced somewhere
        new_nodes[latlon].getparent().remove(new_nodes[latlon])

    # if nodes are not deleted, remove from old file
    undeleted_nodes_latlon = set(old_nodes.keys()).difference(deleted_nodes_latlon)
    for latlon in undeleted_nodes_latlon:
        # TODO: don't remove it it's referenced somewhere
        old_nodes[latlon].getparent().remove(old_nodes[latlon])

    print('Finished in {:.1f} seconds'.format(time.time()-START_TIME))

    tree_new.write(r'{}changed.osm'.format(f_out_prefix), pretty_print=True, xml_declaration=True, encoding='UTF-8')
    tree_old.write(r'{}deleted.osm'.format(f_out_prefix), pretty_print=True, xml_declaration=True, encoding='UTF-8')
Esempio n. 30
0
 def test_bar_passes_parameters(self):
     seq = [1, 2, 3, 4, 5]
     progressbar = frogress.bar(seq)
     self.assertEqual(progressbar.iterable, seq)
Esempio n. 31
0
 def iter(what):
     import frogress
     return frogress.bar(what)
Esempio n. 32
0
def db2json(credentials, name, browsers, operating):
    '''Format data from MySQLdb to Sankey json file, 
    see sankeygreenhouse.json for an example.'''
    # connect
    (MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, MYSQL_DB) = credentials
    db = MySQLdb.connect(host=MYSQL_HOST, passwd=MYSQL_PASSWORD, 
                         user=MYSQL_USER, db=MYSQL_DB)
    cur = db.cursor()

    def repeatcmd(name, *args):
        '''Return int count value.
        Create the command by adding together strings.
        '''
        cmd = 'SELECT COUNT(*) FROM %s' % name
        n = len(args)

        if n/2: cmd += ' WHERE '
        for i in xrange(n): # this makes me unhappy
            if i % 2: continue
            if i != 0: cmd += 'and '
            cmd += '%s = %s ' % (args[i], args[i+1])

        cur.execute(cmd)
        return cur.fetchone()[0]

    # strings
    name    = scrub(name)
    outfile = name + '.json'

    # variables, defaults
    cv,  CV,  conversion,    outcv  = 'conversion',    '1', defaultdict(int), name + 'ncv.json'
    ncv, NCV, nonconversion, outncv = 'nonconversion', '0', defaultdict(int), name + 'cv.json'

    # distinguishing
    s = ' ' # ncv has addl variable s

    # os -> browser -> conversion
    print 'Converting data to links.'
    for b in frogress.bar(browsers):
        # browser to nonconversion
        nonconversion[browsers[b]+s, ncv] += repeatcmd(name, cv, NCV, 'browser', b)

        # browser to conversion
        conversion[browsers[b], cv] += repeatcmd(name, cv, CV, 'browser', b)

        # os to browser
        # differentiating by ncv/cv slows it down by 3 times
        for os in operating:
            # os to browser to nonconversion
            nonconversion[operating[os]+s+browsers[b][0], browsers[b]+s] += \
                            repeatcmd(name, 'OS', os, 'browser', b, cv, NCV)
            # os to browser to conversion
            conversion[operating[os]+browsers[b][0], browsers[b]] += \
                            repeatcmd(name, 'OS', os, 'browser', b, cv, CV)

    # write to the dictionary and json
    print '\nWriting nodes to json.'
    
    def nodemaker(links, extra=[]):
        '''Convert integer 2-keyed links dictionary to nodes list.
        Optional extra (SINGLE) parameter to add.'''
        nodes = [s for (s,t), v in links.items() if v > 0]
        nodes.append(extra)
        nodes = set(nodes)
        return [{"name":n} for n in nodes]

    nodesncv = nodemaker(nonconversion, ncv)
    nodescv  = nodemaker(conversion, cv)
    nodes    = nodesncv + nodescv

    print 'Writing links to json.'
    def linkmaker(links):
        '''Convert integer 2-keyed links dictionary to links list'''
        return [{"source":s, "target":t, "value":v} for (s,t), v in links.items() if v > 0]

    linksncv = linkmaker(nonconversion)
    linkscv  = linkmaker(conversion)
    links    = linksncv + linkscv

    def writejson(links, nodes, outfile):
        '''lol so much repeating and helper functions'''
        linksandnodes = {"links":links, "nodes":nodes}
        with open(outfile, 'w') as f:
            json.dump(linksandnodes, f)

    #writejson(linksncv, nodesncv, outncv)
    #writejson(linkscv, nodescv, outcv)
    writejson(links, nodes, outfile)
Esempio n. 33
0
 def test_context(self):
     with mock.patch('%s.open' % __name__, mock.mock_open(read_data='first line'),
                     create=True) as m:
         with frogress.bar(open('fake_file')) as f:
             result = f.read()
     self.assertEqual(result, 'first line')
Esempio n. 34
0
def iterable2dataframe(iterable, field_name='public_info'):
    result = []
    for row in frogress.bar(iterable):
        result.append(getattr(row, field_name))
    return pd.DataFrame(result)
Esempio n. 35
0
def es_index_construction(relative_list, index_name, corpus):
    '''
    :param url_list: list of URLs to get traffic for and upload to ElasticSearch (input relative path)
    :param index_name:
    :param collection:
    :param num_months_traffic:
    :return:
    '''
    def chunks(l, n):
        # For item i in a range that is a length of l,
        for i in range(0, len(l), n):
            # Create an index range for l of n items:
            yield l[i:i + n]

    #iterate through chunks of URLs to get traffic
    appended_data = []
    for url_chunk in frogress.bar(list(chunks(relative_list, 500))):
        urls = format(url_chunk).replace('[', '').replace(']', '')
        query1 = """SELECT relative_path, timelessness, content_type, title, primary_sub_channel, workflow_status,
         avg(entrances) AS avg_organic_entrances_last_3_months
        FROM
          (SELECT content.relative_path,
                 timelessness,
                 content_type,
                 title,
                 content.primary_sub_channel,
                 content.workflow_status,
                 trunc(traffic_date,
                 'mm') AS month, sum(entrances) AS entrances
          FROM investopedia.page_traffic
          LEFT JOIN investopedia.content using(relative_path)
          WHERE content.relative_path IN ({urls})
                  AND traffic_channel = 'organic search'
                  and traffic_date >= '2017-08-01'
                  and traffic_date < '2017-11-01'
          GROUP BY trunc(traffic_date, 'mm'),timelessness, content_type, title, content.relative_path, content.primary_sub_channel, content.workflow_status) traffic
        GROUP BY relative_path, timelessness, content_type, title, primary_sub_channel, workflow_status""".format(
            urls=urls)

        temp_df = run_ibi_query(query1)
        print(len(temp_df))
        appended_data.append(temp_df)

    traffic_df = pd.concat(appended_data, ignore_index=True)

    traffic_df['url'] = traffic_df['relative_path'].apply(
        lambda x: x.strip('/'))

    #merge corpus with traffic_df to include traffic
    corpus_df = pd.DataFrame.from_dict(corpus)
    merged_df = corpus_df.merge(traffic_df[[
        'url', 'avg_organic_entrances_last_3_months', 'primary_sub_channel',
        'workflow_status'
    ]],
                                how='left',
                                on='url',
                                copy=False)
    merged_df.rename(columns={
        'avg_organic_entrances_last_3_months': 'avg_entrances',
        'primary_sub_channel': 'actual_sub_channel'
    },
                     inplace=True)
    merged_df['avg_entrances'] = merged_df['avg_entrances'].apply(
        lambda x: 0 if np.isnan(x) else x)
    merged_df['actual_sub_channel'] = merged_df['actual_sub_channel'].fillna(
        '')
    merged_df = merged_df.fillna(0)
    es_corpus = merged_df.to_dict(orient='records')
    #import pdb;pdb.set_trace()

    #select a subset of keys we wish to upload to ElasticSearch
    es_list = [{
        your_key: dic[your_key]
        for your_key in [
            'advertising_channel', 'author', 'avg_entrances',
            'actual_sub_channel', 'bodyTEXT', 'channel', 'created', 'sitedate',
            'sub_advertising_channel', 'sub_channel', 'subtype', 'summary',
            'syndate', 'timelessness', 'title', 'type', 'updated', 'url',
            'workflow_status'
        ] if your_key in dic
    } for dic in es_corpus]
    for new in es_list:
        if new['syndate'] != None:
            new['syndate'] = datetime.datetime.utcfromtimestamp(
                int(new['syndate'])).strftime('%Y-%m-%dT%H:%M:%S+00:00')
            new['sitedate'] = datetime.datetime.utcfromtimestamp(
                int(new['sitedate'])).strftime('%Y-%m-%dT%H:%M:%S+00:00')
    #run configuration from investopedia_recirc to establish connection to ElasticSearch service
    configuration()
    #create index with provided name
    InvestopediaRecirc.create_index(index_name=index_name,
                                    force=True,
                                    using='default')
    print('index with name: {} created.'.format(index_name))
    #upload subsetted corpus to ES
    InvestopediaRecirc.bulk_update(
        [InvestopediaRecirc(**doc) for doc in es_list])
    print('corpus uploaded to {}.'.format(index_name))
Esempio n. 36
0
    np.savetxt(outdir + "widths.txt", widths)
    np.savetxt(outdir + "heights.txt", heights)
    np.savetxt(outdir + "pros.txt", pros)
    np.savetxt(outdir + "skys.txt", skys)
    np.savetxt(outdir + "outs.txt", outs)

    names = np.genfromtxt(outdir + "names.txt", dtype=str)
    ras = np.genfromtxt(outdir + "ras.txt")
    decs = np.genfromtxt(outdir + "decs.txt")
    widths = np.genfromtxt(outdir + "widths.txt")
    heights = np.genfromtxt(outdir + "heights.txt")
    pros = np.genfromtxt(outdir + "pros.txt")
    skys = np.genfromtxt(outdir + "skys.txt")
    outs = np.genfromtxt(outdir + "outs.txt")

    names = names[it:]
    ras = ras[it:]
    decs = decs[it:]
    widths = widths[it:]
    heights = heights[it:]
    pros = pros[it:]
    skys = skys[it:]
    outs = outs[it:]

    for j in range(len(names)):
        names[j] = str(outdir) + str(os.path.basename(names[j]))

    for i in frogress.bar(range(len(names))):
        tools.edge_correct(names[i], pros[i], skys[i], ras[i], decs[i],
                           outs[i], widths[i], heights[i], indir, outdir)
Esempio n. 37
0
            endung = "." + endung
            break
        elif endung.lower() == "x":
            print "Aufwiedersehen\n"
            exit()
        else:
            print "Bitte Eingabe richtig machen "
    except ValueError:
        print "Bitte richtige Eingabe"
#hier  beginntder Prozess der einzelnen Ordner
path = wd + '/%s'
for Buchstabe in ALPHA:
    if not os.path.exists(path % Buchstabe):
        os.makedirs(path % Buchstabe)
# Listet alle Dateien im Suchverzeichnis auf  mit mit der Endung txt
for file in frogress.bar(os.listdir(hiermachen)):
    # wenn der Dateiname (wenn groß dann automatisch klein ".lower()" endet mit *.bin
    # und der erste Buchtabe (Groß zum vergleich mit Alpha) file[0].upper() in ALPHA vorkommt
    if file.lower().endswith(endung.lower()) and file[0].upper() in ALPHA:
        # zum einsparen einer Schleife nehme den 1.Buschtaben der Datei in Groß um das zielverzeichniss zu finden
        #datei = open("datenbank.csv", "a")
        with open("datenbank.csv", "a") as datei:
            logdatei = open("copy.log", "a")
            Buchstabe = file[0].upper()
            sourcefile = os.path.join(hiermachen, file)
            destinationfile = os.path.join(path % Buchstabe, file)
            #Debug Ausgabe print"%s ==> %s" % (file,destinationfile)
            shutil.move(sourcefile, destinationfile)
            datei.write("\t%s,\t%s\n" % (file, destinationfile))
            logdatei.write("%s nach %s verschoben\n" % (file, destinationfile))
            logdatei.close()
Esempio n. 38
0
def procedure(rank, size):
 
	steps = 2643.0 - 635.0
	chunky = int(steps/size)
        rest = steps - chunky*size
        mini = chunky*rank
        maxi = chunky*(rank + 1)
        if rank >= (size - 1 - rest):
                maxi += 2 + rank - size + rest
		mini += rank - size + 1 + rest
	mini += 635
	maxi += 635
	mini = int(mini)
	maxi = int(maxi)

        if color == "iband":
            filter_ = "SDSS i"
        elif color == "red":
            filter_ = "SDSS r"
        elif color == "green":
            filter_ = "SDSS g"
        else:
            print("Defined filter not available. Choose red, green or iband")
            return

	imap = mwdust.Combined15(filter = filter_, sf10 = True)


	print("Creating Grid...")
	
	foos = np.arange(mini, maxi, 1)
	full = tools.create_grid(foos)
	print("Kernel %s doing %s up to %s" % (rank, mini, maxi))
	print("Calculating borders of the cells")
	minras, maxras, mindecs, maxdecs = tools.get_centers(full[:,0], full[:,1], borders = True, maskdetect = True)

	print("Using %s objects in Kernel %s" % (full.shape[0], rank))


	print("Converting coordinates to galactic system")
	idy = np.where(np.abs(minras + 99.0) < 0.0001))[0]
	idy2 = np.where(np.abs(minras + 999.0) < 0.0001))[0]
	idy = np.append(idy, idy2)
	idy = np.unique(idy)
	for it in idy:
		minras[it] = 0.0
		mindecs[it] = 0.0
		maxras[it] = 0.0
		maxdecs[it] = 0.0
	c_maxs = SkyCoord(ra = maxras*u.degree, dec = maxdecs*u.degree, frame = 'icrs')
	c_mins = SkyCoord(ra = minras*u.degree, dec = mindecs*u.degree, frame = 'icrs')
	c_maxs = c_maxs.galactic
	c_mins = c_mins.galactic
	ramins = c_mins.l.degree
	ramaxs = c_maxs.l.degree
	decmaxs = c_mins.b.degree
	decmins = c_maxs.b.degree
	print("Calculating extinction...")
	dustyness = np.zeros(0)
	for idx in frogress.bar(range(full.shape[0])):
		if idx in idy:
			dustyness = np.append(dustyness, -99.0)
			continue
		dustynesscolor = np.zeros(0)
		for b in np.linspace(decmins[idx], decmaxs[idx], 5):
                	for l in np.linspace(ramins[idx], ramaxs[idx], 5):
				if color == "red":
                                    foo = rmap(l, b, depth)
				elif color == "iband":
                                    foo = imap(l, b, depth)
                                elif color == "green":
				    foo = gmap(l, b, depth)
                        	dustynesscolor = np.append(dustynesscolor, foo)
        	dustynesscolor = np.average(dustynesscolor)
		dustyness = np.append(dustyness, dustynesscolor)
        np.savetxt(str(outdir)+str(color)+"parts/dust_catalog_"+str(color)+"_"+str(rank)+".csv", np.hstack((full, dustyness.reshape(dustyness.size, 1))))
Esempio n. 39
0
 def test_bar_passes_parameters(self):
     seq = [1, 2, 3, 4, 5]
     progressbar = frogress.bar(seq)
     self.assertEqual(progressbar.iterable, seq)
Esempio n. 40
0
def db2json(credentials, name, browsers, operating):
    '''Format data from MySQLdb to Sankey json file, 
    see sankeygreenhouse.json for an example.'''
    # connect
    (MYSQL_HOST, MYSQL_USER, MYSQL_PASSWORD, MYSQL_DB) = credentials
    db = MySQLdb.connect(host=MYSQL_HOST,
                         passwd=MYSQL_PASSWORD,
                         user=MYSQL_USER,
                         db=MYSQL_DB)
    cur = db.cursor()

    def repeatcmd(name, *args):
        '''Return int count value.
        Create the command by adding together strings.
        '''
        cmd = 'SELECT COUNT(*) FROM %s' % name
        n = len(args)

        if n / 2: cmd += ' WHERE '
        for i in xrange(n):  # this makes me unhappy
            if i % 2: continue
            if i != 0: cmd += 'and '
            cmd += '%s = %s ' % (args[i], args[i + 1])

        cur.execute(cmd)
        return cur.fetchone()[0]

    # strings
    name = scrub(name)
    outfile = name + '.json'

    # variables, defaults
    cv, CV, conversion, outcv = 'conversion', '1', defaultdict(
        int), name + 'ncv.json'
    ncv, NCV, nonconversion, outncv = 'nonconversion', '0', defaultdict(
        int), name + 'cv.json'

    # distinguishing
    s = ' '  # ncv has addl variable s

    # os -> browser -> conversion
    print 'Converting data to links.'
    for b in frogress.bar(browsers):
        # browser to nonconversion
        nonconversion[browsers[b] + s,
                      ncv] += repeatcmd(name, cv, NCV, 'browser', b)

        # browser to conversion
        conversion[browsers[b], cv] += repeatcmd(name, cv, CV, 'browser', b)

        # os to browser
        # differentiating by ncv/cv slows it down by 3 times
        for os in operating:
            # os to browser to nonconversion
            nonconversion[operating[os]+s+browsers[b][0], browsers[b]+s] += \
                            repeatcmd(name, 'OS', os, 'browser', b, cv, NCV)
            # os to browser to conversion
            conversion[operating[os]+browsers[b][0], browsers[b]] += \
                            repeatcmd(name, 'OS', os, 'browser', b, cv, CV)

    # write to the dictionary and json
    print '\nWriting nodes to json.'

    def nodemaker(links, extra=[]):
        '''Convert integer 2-keyed links dictionary to nodes list.
        Optional extra (SINGLE) parameter to add.'''
        nodes = [s for (s, t), v in links.items() if v > 0]
        nodes.append(extra)
        nodes = set(nodes)
        return [{"name": n} for n in nodes]

    nodesncv = nodemaker(nonconversion, ncv)
    nodescv = nodemaker(conversion, cv)
    nodes = nodesncv + nodescv

    print 'Writing links to json.'

    def linkmaker(links):
        '''Convert integer 2-keyed links dictionary to links list'''
        return [{
            "source": s,
            "target": t,
            "value": v
        } for (s, t), v in links.items() if v > 0]

    linksncv = linkmaker(nonconversion)
    linkscv = linkmaker(conversion)
    links = linksncv + linkscv

    def writejson(links, nodes, outfile):
        '''lol so much repeating and helper functions'''
        linksandnodes = {"links": links, "nodes": nodes}
        with open(outfile, 'w') as f:
            json.dump(linksandnodes, f)

    #writejson(linksncv, nodesncv, outncv)
    #writejson(linkscv, nodescv, outcv)
    writejson(links, nodes, outfile)
Esempio n. 41
0
import subprocess
import time
import frogress

#Bildschirm sauber machen
os.system('clear')
program = raw_input(
    "Was wollen Sie machen Dateien (e)rstellen oder (l)öschen : ")
#abfrage nach der Anzahl der zu erstellenden Dateien
if program.lower() == ('e'):
    anzahl = input("Wieviiel files sollen erstellt werden :")
    #abfrage nach dem preafix der zu erstellenden Dateien
    endungen = raw_input("Welche Endung soll die Datei haben :")
    #i ist nur eine variable für den itterator
    #for i in range(0, anzahl):
    for i in frogress.bar(range(0, anzahl)):
        ran = random.choice(string.ascii_letters)
        ran1 = random.choice(string.ascii_letters)
        subprocess.call("touch %s%s.%s" % (ran, ran1, endungen), shell=True)
        #open ('%s%s.txt' % (ran,ran1), 'a').close()
        #print ("touch %s%s.txt") % (ran,ran1)
elif program.lower() == ('l'):
    #erstellt eine liste von dateien und ordnern im derzeitigen verzeichnis und entfernt dateien mit .py sowie Ordner
    filelist = [
        f for f in os.listdir(".")
        if not f.endswith(".py") and not os.path.isdir(f)
    ]
    for f in filelist:
        #print f
        #löscht dateien
        os.remove(f)
Esempio n. 42
0
def main():
    args = parse_args()

    db_conn = pymysql.connect(**args.mysql_url)

    insert_tpl = '''
    INSERT INTO `mag_papers` (
        `paper_id`,
        `original_paper_title`,
        `normalized_paper_title`,
        `paper_publish_year`,
        `paper_publish_date`,
        `paper_doi`,
        `original_venue_name`,
        `normalized_venue_name`,
        `journal_id_mapped_to_venue_name`,
        `conference_series_id_mapped_to_venue_name`,
        `paper_rank`
    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    '''

    if args.create_tables:
        print('Creating tables and indexes')
        create_tables_and_indexes(db_conn.cursor())
        db_conn.commit()

    print('Reading', args.input_csv, '...')
    input_file = utils.open_compressed_file(args.input_csv)
    cursor = db_conn.cursor()
    with input_file, cursor:
        csvreader = csv.reader(
            input_file,
            delimiter='\t',
            quoting=csv.QUOTE_NONE,
        )
        records = (
            parse_papers_record(r)
            for r in csvreader
        )

        records_truncated = (
            (
                r.paper_id[:50],
                r.original_paper_title[:255],
                r.normalized_paper_title[:255],
                r.paper_publish_year,
                r.paper_publish_date,
                r.paper_doi[:255],
                r.original_venue_name[:255],
                r.normalized_venue_name[:255],
                r.journal_id_mapped_to_venue_name[:255],
                r.converence_series_id_mapped_to_venue_name[:255],
                r.paper_rank,
            ) for r in records
        )

        records_with_progress = frogress.bar(
            records_truncated,
            steps=args.expected_records,
        )
        cursor.executemany(insert_tpl, records_with_progress)
    db_conn.commit()