Python ProgressFish.animate Exemples, fish.ProgressFish.animate Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : merge_fishersInverseChi2.py Projet : gessulat/txt_mining

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'tanPath', help='file path: to tanimoto matrix pickle"../foo.pickle" ')
    parser.add_argument(
        'cosPath', help='file path: to cosine matrix pickle"../foo.pickle" ')
    parser.add_argument(
        'outPath', help='file path: to merged matrix pickle"../foo.pickle" ')
    parser.add_argument('lmbd', help='between 0 and 1')
    args = parser.parse_args()

    print 'loading tanimoto matrix pickle...'
    f = open(args.tanPath)
    tanMatrix = cPickle.load(f)
    f.close()
    print 'loading cosine matrix pickle...'
    f = open(args.cosPath)
    cosMatrix = cPickle.load(f)
    f.close()

    result = copy(cosMatrix)
    length = len(tanMatrix)
    fish = ProgressFish(total=length)
    for i in range(length):
        result[i] = fishers_chiSquare_method(cosMatrix[i], tanMatrix[i],
                                             float(args.lmbd))
        fish.animate(amount=i)
    print 'pickling to ' + args.outPath
    f = open(args.outPath, 'w')
    cPickle.dump(result, f)
    f.close()

Exemple #2

0

Afficher le fichier

Fichier : merge_fishersInverseChi2.py Projet : gessulat/txt_mining

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('tanPath', help='file path: to tanimoto matrix pickle"../foo.pickle" ')
    parser.add_argument('cosPath', help='file path: to cosine matrix pickle"../foo.pickle" ')
    parser.add_argument('outPath', help='file path: to merged matrix pickle"../foo.pickle" ')
    parser.add_argument('lmbd', help='between 0 and 1')
    args = parser.parse_args()
    
    print 'loading tanimoto matrix pickle...'
    f = open(args.tanPath)
    tanMatrix = cPickle.load(f)
    f.close()
    print 'loading cosine matrix pickle...'
    f = open(args.cosPath)
    cosMatrix = cPickle.load(f)
    f.close()
    
    result = copy(cosMatrix)
    length = len(tanMatrix)
    fish = ProgressFish(total = length )
    for i in range(length):
        result[i] = fishers_chiSquare_method( cosMatrix[i], tanMatrix[i], float(args.lmbd) )
        fish.animate(amount=i)
    print 'pickling to '+args.outPath
    f = open(args.outPath, 'w')
    cPickle.dump( result, f ) 
    f.close()

Exemple #3

0

Afficher le fichier

    def fit(self, x_train, y_train, x_dev=None, y_dev=None, batch_size=100):
        train_fn = theano.function(inputs=[self.batch_x, self.batch_y],
                                   outputs=self.cost,
                                   updates=self.updates,
                                   givens={
                                       x: self.batch_x,
                                       y: self.batch_y
                                   })

        train_set_iterator = DatasetMiniBatchIterator(self.rng,
                                                      x_train,
                                                      y_train,
                                                      batch_size=batch_size,
                                                      randomize=True)
        dev_set_iterator = DatasetMiniBatchIterator(self.rng,
                                                    x_dev,
                                                    y_dev,
                                                    batch_size=batch_size,
                                                    randomize=False)

        train_score = self._batch_score(train_set_iterator)
        dev_score = self._batch_score(dev_set_iterator)

        best_dev_error = numpy.inf
        epoch = 0
        timer_train = time.time()
        while epoch < n_epochs:
            avg_costs = []
            timer = time.time()
            fish = ProgressFish(total=len(train_set_iterator))
            for i, (x, y) in enumerate(train_set_iterator, 1):
                fish.animate(amount=i)

                avg_cost = train_fn(x, y)
                if type(avg_cost) == list:
                    avg_costs.append(avg_cost[0])
                else:
                    avg_costs.append(avg_cost)

            mean_cost = numpy.mean(avg_costs)
            mean_train_error = numpy.mean(train_score())
            dev_error = numpy.mean(dev_score())
            print(
                'epoch {} took {:.4f} seconds; '
                'avg costs: {:.4f}; train error: {:.4f}; '
                'dev error: {:.4f}'.format(epoch,
                                           time.time() - timer, mean_cost,
                                           mean_train_error, dev_error))

            if dev_error < best_dev_error:
                best_dev_error = dev_error
                best_params = [numpy.copy(p.get_value()) for p in params]
            epoch += 1

        print('Training took: {:.4f} seconds'.format(time.time() -
                                                     timer_train))
        for i, param in enumerate(best_params):
            params[i].set_value(param, borrow=True)

Exemple #4

0

Afficher le fichier

Fichier : batch_calc_tanimoto.py Projet : gessulat/txt_mining

def get_vector_list( refs, refs_base):

    fish = ProgressFish(total=len(refs_base))

    vector_list = ref_to_vector( refs_base[0], refs) # init
    for i in range(1, len(refs_base)):
        column_vector = ref_to_vector( refs_base[i],refs )
        vector_list = column_stack( [ vector_list , column_vector ] )
        fish.animate(amount=i)
    return vector_list

Exemple #5

0

Afficher le fichier

Fichier : map_dep_postcode.py Projet : hrickards/devisive

def do_records(records):
    num_records = LIMIT
    fish = ProgressFish(total=num_records)
    for i, record in enumerate(records):
        fish.animate(amount=i)
        keys = record.keys()
        x = get_o_list(record, x_keys)
        y = get_o_list(record, y_keys)

        y_count = len(filter((lambda r: r != "\"\"" and r != "''" and r != ""), y))
        x_count = len(filter((lambda r: r != "\"\"" and r != "''" and r != ""), x))

        if y_count == 0 or x_count == 0: continue

        home_dep_details = [""]*16

        try:
            home_lsoa_code = record['"CEN_LSOA"'][1:-1]
            if home_lsoa_code != None and home_lsoa_code != '':
                home_dep_details = dep_2_mappings.find_one({'code': home_lsoa_code})['dep']
        except Exception as e:
            print "home - probably wales/scotland"
            print e

        x += home_dep_details

        sd = [""]*133

        try:
            sd2 = schools_data.find_one({'KS5_11SCHNAME': record["\"SCH_SCHOOLNAME\""][1:-1]}, {'_id': 0})
            if sd2 != None: sd = map((lambda k: sd2[k]), ["LURN", "LLA", "LESTAB", "LLAESTAB", "LSCHNAME", "LSTREET", "LLOCALITY", "LADDRESS3", "LTOWN", "LPOSTCODE", "LTELNUM", "LICLOSE", "LISNEW", "LMINORGROUP", "LNFTYPE", "LISPRIMARY", "LISSECONDARY", "LISPOST16", "LAGEL", "LAGEH", "LGENDER", "LSFGENDER", "LRELDENOM", "LADMPOL", "LNEWACFLAG", "KS5_11RECTYPE", "KS5_11ALPHAIND", "KS5_11REGION", "KS5_11LASORT", "KS5_11LEA", "KS5_11ESTAB", "KS5_11URN", "KS5_11SCHNAME_AC", "KS5_11SCHNAME", "KS5_11ADDRESS1", "KS5_11ADDRESS2", "KS5_11ADDRESS3", "KS5_11TOWN", "KS5_11PCODE", "KS5_11TELNUM", "KS5_11CONTFLAG", "KS5_11NFTYPE", "KS5_11RELDENOM", "KS5_11ADMPOL", "KS5_11GENDER1618", "KS5_11FEEDER", "KS5_11AGERANGE", "KS5_11ICLOSE", "KS5_11TABKS2", "KS5_11TAB15", "KS5_11EXAMCONF", "KS5_11DUMMY1", "KS5_11TPUP1618", "KS5_11TALLPUPA", "KS5_11TALLPPSA", "KS5_11TALLPPEA", "KS5_11PTPASS1L3", "KS5_11PTPASS2LV3", "KS5_11PTPASS3LV3", "KS5_11TALLPPS08", "KS5_11TALLPPS09", "KS5_11TALLPPS10", "KS5_11TALLPPE08", "KS5_11TALLPPE09", "KS5_11TALLPPE10", "ABS_11LA", "ABS_11ESTAB", "ABS_11URN", "ABS_11PERCTOT", "ABS_11PERCUA", "ABS_11PPERSABS15", "ABS_11PPERSABS20", "CFR_11URN", "CFR_11LANUMBER", "CFR_11LONDON/NON-LONDON", "CFR_11MEDIAN", "CFR_11PUPILS", "CFR_11FSM", "CFR_11FSMBAND", "CFR_11GRANTFUNDING", "CFR_11SELFGENINCOME", "CFR_11TOTALINCOME", "CFR_11TEACHINGSTAFF", "CFR_11SUPPLYTEACHERS", "CFR_11EDUCATIONSUPPORTSTAFF", "CFR_11PREMISES", "CFR_11BACKOFFICE", "CFR_11CATERING", "CFR_11OTHERSTAFF", "CFR_11ENERGY", "CFR_11LEARNINGRESOURCES", "CFR_11ICT", "CFR_11BOUGHTIN", "CFR_11OTHER", "CFR_11TOTALEXPENDITURE", "SWF_11LA", "SWF_11URN", "SWF_11NTEA", "SWF_11NTEAAS", "SWF_11NNONTEA", "SWF_11NFTETEA", "SWF_11NFTETEAAS", "SWF_11RATPUPTEA", "SWF_11SALARY", "CENSUS_11URN", "CENSUS_11LAESTAB", "CENSUS_11NUMFTE", "CENSUS_11TOTPUPSENDN", "CENSUS_11TSENSAP", "CENSUS_11TSENA", "CENSUS_11TOTSENST", "CENSUS_11TOTSENAP", "CENSUS_11PSENSAP", "CENSUS_11PSENA", "CENSUS_11PTOTSENST", "CENSUS_11PTOTSENAP", "CENSUS_11TOTPUPEALDN", "CENSUS_11NUMEAL", "CENSUS_11NUMENGFL", "CENSUS_11NUMUNCFL", "CENSUS_11PNUMEAL", "CENSUS_11PNUMENGFL", "CENSUS_11PNUMUNCFL", "CENSUS_11TOTPUPFSMDN", "CENSUS_11NUMFSM", "CENSUS_11NUMNOFSM", "CENSUS_11PNUMFSM", "CENSUS_11PNUMNOFSM", "OLA", "OURN", "OSCHOOLNAME", "OPHASE", "OREPORTURL"])
        except Exception as e:
            print "school details"
            print e

        x += sd

        school_dep_details = [""]*16

        try:
            school_postcode = record["\"SCH_POSTCODE\""][1:-1]
            school_lsoa_code = postcodes.find_one({'Postcode2': school_postcode}, {'Code':1})
            if school_lsoa_code != None:
                school_lsoa_code = school_lsoa_code['Code']
                school_dep_details = dep_2_mappings.find_one({'code': school_lsoa_code})['dep']
        except Exception as e:
            print "school deps"
            print e

        x += school_dep_details

        coll.insert({'x': x, 'y': y})

Exemple #6

0

Afficher le fichier

Fichier : stem_abstracts.py Projet : gessulat/txt_mining

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'in_abs', help='input abstracts file path: "../*_abstracts.pickle" ')
    parser.add_argument(
        'out_stemmed_abs',
        default='../stemmed_abstracts.pickle',
        help='file path of abstracts output file: "stemmed_abstracts.pickle"')
    parser.add_argument(
        'out_words',
        default='../word_base.pickle',
        help='file path of words output file: "word_base.pickle"')

    args = parser.parse_args()

    print "loading abstracts..."
    abstracts_file = open(args.in_abs)
    abstracts = cPickle.load(abstracts_file)
    abstracts_file.close()

    words = []
    stemmed_abstracts = {}
    fish = ProgressFish(total=len(abstracts))

    cnt = 0
    print "reading all words..."
    for (key, abstract) in abstracts.items():
        sentence = wordpunct_tokenize(abstract.lower())
        new_sentence = []
        for word in sentence:
            if word.isalnum():
                stemmed_word = stem(word)
                words.append(stemmed_word)
                new_sentence.append(stemmed_word)
        stemmed_abstracts[key] = list(set(new_sentence))
        cnt += 1
        fish.animate(amount=cnt)

    print "removing duplicates"
    words = set(words)

    print "persisting word_base"
    word_base = open(args.out_words, 'w')
    cPickle.dump(words, word_base)
    word_base.close()

    print "persisting abstracts"
    stemmed_abstracts_file = open(args.out_stemmed_abs, 'w')
    cPickle.dump(stemmed_abstracts, stemmed_abstracts_file)
    abstracts_file.close()

Exemple #7

0

Afficher le fichier

Fichier : audit_scores.py Projet : rerb/stars

def audit_scores(ss_id=None):
    """
        recalculates all scores and display the changes
    """

    reload(sys)
    sys.setdefaultencoding('utf8')

    if not ss_id:
        print "iterating all submission sets"
        cs = CreditSet.objects.get(pk=6)
        qs = SubmissionSet.objects.filter(status='r').filter(creditset=cs)
    else:
        print "auditing SS: %s" % ss_id
        qs = [SubmissionSet.objects.get(pk=ss_id)]

    display_table = []
    fish = ProgressFish(total=len(qs))

    count = 0
    for ss in qs:
        count += 1

        fish.animate(amount=count)

        # current_score = get_score_object(ss)
        # recalculate_all_scores(ss)
        # recalculated_score = get_score_object(ss)
        # compare scores
        s1 = get_score_obj(ss, credits=False)
        ss.get_STARS_score(recalculate=True)
        s2 = get_score_obj(ss, credits=False)
        compare_score_objects(s1, s2, display_table)

    #     current_score = round(ss.score, 2)
    #     recalculated_score = round(ss.get_STARS_score(recalculate=True), 2)
    #
    #     if abs(current_score - recalculated_score) > .1:
    #         display_table.append([
    #             ss, current_score, recalculated_score,
    #             current_score - recalculated_score, ss.date_submitted, ss.id])
    #
    # if display_table:
    print tabulate(display_table,
                   headers=[
                       'submission set', 'name', 'id', 'calculated_score',
                       'recalculated_score', 'delta'
                   ])

Exemple #8

0

Afficher le fichier

Fichier : stem_abstracts.py Projet : gessulat/txt_mining

def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('in_abs', help='input abstracts file path: "../*_abstracts.pickle" ')
	parser.add_argument('out_stemmed_abs', default='../stemmed_abstracts.pickle', help='file path of abstracts output file: "stemmed_abstracts.pickle"')
	parser.add_argument('out_words', default='../word_base.pickle', help='file path of words output file: "word_base.pickle"')

	args = parser.parse_args()

	
	
	print "loading abstracts..."
	abstracts_file = open(args.in_abs)
	abstracts = cPickle.load(abstracts_file)
	abstracts_file.close()

	
	words = []
	stemmed_abstracts = {}
	fish = ProgressFish(total=len(abstracts))
	
	cnt = 0
	print "reading all words..."
	for (key, abstract) in abstracts.items():
		sentence = wordpunct_tokenize(abstract.lower())
		new_sentence = []
		for word in sentence:
			if word.isalnum():
				stemmed_word = stem(word)
				words.append( stemmed_word )
				new_sentence.append( stemmed_word )
		stemmed_abstracts[key] = list(set(new_sentence))
		cnt += 1
		fish.animate(amount=cnt)

	print "removing duplicates"
	words = set(words)

	print "persisting word_base"
	word_base = open(args.out_words, 'w')
	cPickle.dump(words, word_base)
	word_base.close()

	print "persisting abstracts"
	stemmed_abstracts_file = open(args.out_stemmed_abs, 'w')
	cPickle.dump(stemmed_abstracts, stemmed_abstracts_file)
	abstracts_file.close()

Exemple #9

0

Afficher le fichier

def abstracts_to_vector(abstracts, word_base):

    print 'converting abstracts...'
    cnt = 0
    word_base_dict = {}
    for word in word_base:
        word_base_dict[word] = cnt
        cnt += 1

    cnt = 0
    fish = ProgressFish(total=len(abstracts))
    for key, abstract in abstracts.items():
        vector_abstract = abstract_to_vector(abstract, word_base_dict)
        abstracts[key] = vector_abstract
        cnt += 1
        fish.animate(amount=cnt)

    return abstracts

Exemple #10

0

Afficher le fichier

Fichier : abstracts_to_vectors.py Projet : gessulat/txt_mining

def abstracts_to_vector( abstracts, word_base):
	
	print 'converting abstracts...'
	cnt = 0
	word_base_dict = {}
	for word in word_base:
		word_base_dict[word] = cnt
		cnt +=1

	cnt = 0
	fish = ProgressFish(total=len(abstracts))
	for key, abstract in abstracts.items():
		vector_abstract = abstract_to_vector( abstract, word_base_dict)
		abstracts[key] = vector_abstract
		cnt += 1
		fish.animate(amount=cnt)

	return abstracts

Exemple #11

0

Afficher le fichier

Fichier : convert_to_lists.py Projet : gessulat/txt_mining

def convert_to_list( abstracts, references ):
	no_of_docs = len(abstracts)
	if len(references) != no_of_docs:
		print 'abstracts and refs must have same size!'
	abs_list = list()
	refs_list = list()
	keys_list = list()
	print len(references)
	
	cnt = 1
	fish = ProgressFish(total=len(abstracts))
	for key in references.keys():
		fish.animate(amount=cnt)
		cnt +=1
		abs_list.append( abstracts[key] )
		refs_list.append( references[key] )
		keys_list.append( key )
	return abs_list, refs_list, keys_list

Exemple #12

0

Afficher le fichier

def convert_to_list(abstracts, references):
    no_of_docs = len(abstracts)
    if len(references) != no_of_docs:
        print 'abstracts and refs must have same size!'
    abs_list = list()
    refs_list = list()
    keys_list = list()
    print len(references)

    cnt = 1
    fish = ProgressFish(total=len(abstracts))
    for key in references.keys():
        fish.animate(amount=cnt)
        cnt += 1
        abs_list.append(abstracts[key])
        refs_list.append(references[key])
        keys_list.append(key)
    return abs_list, refs_list, keys_list

Exemple #13

0

Afficher le fichier

Fichier : sgd_trainer.py Projet : BinbinBian/deep-qa

  def fit(self, x_train, y_train, x_dev=None, y_dev=None, batch_size=100):
    train_fn = theano.function(inputs=[self.batch_x, self.batch_y],
                               outputs=self.cost,
                               updates=self.updates,
                               givens={x: self.batch_x, y: self.batch_y})

    train_set_iterator = DatasetMiniBatchIterator(self.rng, x_train, y_train, batch_size=batch_size, randomize=True)
    dev_set_iterator = DatasetMiniBatchIterator(self.rng, x_dev, y_dev, batch_size=batch_size, randomize=False)

    train_score = self._batch_score(train_set_iterator)
    dev_score = self._batch_score(dev_set_iterator)

    best_dev_error = numpy.inf
    epoch = 0
    timer_train = time.time()
    while epoch < n_epochs:
        avg_costs = []
        timer = time.time()
        fish = ProgressFish(total=len(train_set_iterator))
        for i, (x, y) in enumerate(train_set_iterator, 1):
            fish.animate(amount=i)

            avg_cost = train_fn(x, y)
            if type(avg_cost) == list:
              avg_costs.append(avg_cost[0])
            else:
              avg_costs.append(avg_cost)

        mean_cost = numpy.mean(avg_costs)
        mean_train_error = numpy.mean(train_score())
        dev_error = numpy.mean(dev_score())
        print('epoch {} took {:.4f} seconds; '
              'avg costs: {:.4f}; train error: {:.4f}; '
              'dev error: {:.4f}'.format(epoch,time.time() - timer, mean_cost,
                                         mean_train_error, dev_error))

        if dev_error < best_dev_error:
            best_dev_error = dev_error
            best_params = [numpy.copy(p.get_value()) for p in params]
        epoch += 1

    print('Training took: {:.4f} seconds'.format(time.time() - timer_train))
    for i, param in enumerate(best_params):
      params[i].set_value(param, borrow=True)

Exemple #14

0

Afficher le fichier

Fichier : download.py Projet : hdra/metacasts.tv-downloader

def download_url(url, save_as):
    r = requests.get(url, cookies = {'_metacasts_session': SESSION_COOKIE}, stream=True)
    file_size = int(r.headers['content-length'])
    mime_type = r.headers['content-type']
    downloaded_bytes = 0

    file_name = save_as+guess_extension(mime_type)
    file_mode = 'wb'

    if( os.path.exists(file_name) ):
        existing_size = os.path.getsize(file_name)
        print 'size is {0} vs {1}'.format(existing_size, file_size)
        if( os.path.getsize(file_name) == file_size ):
            print file_name+" already exists. Skipping..."
            return
        else:
            print "File incomplete. Resuming..."
            file_mode = 'ab'
            r = requests.get(url, 
                             cookies = {'_metacasts_session': SESSION_COOKIE}, 
                             headers = {'Range': 'bytes={0}-'.format(existing_size)},
                             stream=True)
            downloaded_bytes = existing_size

            if( int(r.headers['content-length']) != file_size - existing_size ):
                print "File size mismatch. Reset download."
                os.remove(file_name)
                file_mode = 'wb'
                downloaded_bytes = 0
                

    with open(file_name, file_mode) as f:
        print "Downloading {0}...".format(file_name)
        pf = ProgressFish(total=file_size)
        for index, chunk in enumerate(r.iter_content(chunk_size=128*1024)):
            if chunk:
                downloaded_bytes += len(chunk)
                f.write(chunk)
                f.flush()
                pf.animate(amount=downloaded_bytes)

        print "{0} finished download".format(file_name)

Exemple #15

0

Afficher le fichier

Fichier : sampling.py Projet : gessulat/txt_mining

def random_sampling( abstracts, references, no_of_entries):
	length = check_lengths( abstracts, references)
	percentage = float(no_of_entries)/length*100
	print 'reduce '+str(length)+' to '+str(no_of_entries)
	print "that's about "+str(percentage)+"% of the original size "


	fish = ProgressFish(total = int(no_of_entries) )

	key_list = abstracts.keys()
	random.shuffle(key_list)
	new_abs = {}
	new_refs = {}

	for i in range( int(no_of_entries) ):
		fish.animate(amount=i)
		choice = key_list.pop()
		new_abs[choice] = abstracts[choice]
		new_refs[choice] = references[choice]
	return new_abs, new_refs

Exemple #16

0

Afficher le fichier

def fetch(urls):
    now = get_datetime(gmtime())
    with futures.ThreadPoolExecutor(max_workers=5) as executor:
        future_to_url = {
            executor.submit(feedparser.parse, url): url
            for url in urls
        }

    fish = ProgressFish(total=len(urls))
    i = 0
    for future in futures.as_completed(future_to_url):
        url = future_to_url[future]
        if future.exception() is not None:
            log('Error reading %r: %s' % (url, future.exception()))
            continue

        feed = future.result()
        _feed, created = Feed.objects.get_or_create(url=url)
        for entry in feed.entries:
            try:
                pub_time = get_datetime(entry.date_parsed)
            except AttributeError:
                log('No date_parsed attribute on entry')
                continue
            except:
                log('Error reading entry date')
                continue

            try:
                assert pub_time - now < TIME_EPSILON
            except AssertionError:
                print >> sys.stderr, 'Entry is from future? %s %s' % (pub_time,
                                                                      url)

            _entry, created = Entry.objects.get_or_create(feed=_feed,
                                                          pub_time=pub_time)
            for item in get_items(entry):
                Item.objects.get_or_create(value=item, entry=_entry)

        fish.animate(amount=i)
        i += 1

Exemple #17

0

Afficher le fichier

def main():
	source = open("../dense_abstracts.pickle")
	out_file = open('../english_abstracts.pickle', 'w')
	out_diff = open('../english_abstracts_diff.pickle', 'w')
	print str(datetime.now())+ "filterNonEnglishAbstracts.py - deletes entries with non-English abstracts "

	if not source:
		print "This works.... NOT"
		return -1
	
	deleted_abstract_ids = []
	print "reading abstracts..."
	abstracts = cPickle.load(source)
	abs_tobeginwith = float(len(abstracts))

	print "deleting non-English ones"
	empty_cnt = 0

	fish = ProgressFish(total = len(abstracts))
	count = 0
	for article_id, abstract in abstracts.items():
		count += 1
		fish.animate(amount=count)
		if 'en' != guessLanguage( abstract ):
			empty_cnt += 1
			deleted_abstract_ids.append(article_id)
			del abstracts[article_id]

	print str(datetime.now())+' starting to persist references to: '+ out_file.name +' and '+out_diff.name
	print "deleted "+str(empty_cnt)+" documents"
	print "that's "+str( empty_cnt / abs_tobeginwith )+"%"

	cPickle.dump(abstracts, out_file, -1)
	cPickle.dump(deleted_abstract_ids, out_diff, -1)

	source.close()
	out_file.close()
	out_diff.close()

Exemple #18

0

Afficher le fichier

Fichier : sampling.py Projet : gessulat/txt_mining

def most_refs_sampling( abstracts, references, no_of_entries):
	no_of_entries = int(no_of_entries)
	length = check_lengths( abstracts, references)
	percentage = float(no_of_entries)/length*100
	print 'reduce '+str(length)+' to '+str(no_of_entries)
	print "that's about "+str(percentage)+"% of the original size "

	fish = ProgressFish(total = int(no_of_entries) )

	new_abs = {}
	new_refs = {}
	count_refs = {}

	for key, refs in references.items():
		count_refs[key] = len(refs)
	refs_sorted_by_count = sorted(count_refs.iteritems(), key=operator.itemgetter(1))

	for i in range( int(no_of_entries) ):
		fish.animate(amount=i)
		(choice, count) = refs_sorted_by_count.pop()
		new_abs[choice] = abstracts[choice]
		new_refs[choice] = references[choice]
	return new_abs, new_refs

Exemple #19

0

Afficher le fichier

Fichier : test_streaming_files_simple_multiple_mountpoints.py Projet : joe42/fusetests

def wait_for_completed_transfer(mountpoint, timeout_in_s = None):
    print "waiting for completed upload"
    if timeout_in_s is not None:
        print "waiting at most %s min" % (timeout_in_s/60)
    else:
        timeout_in_s = float("inf")
    CLOUDFUSION_NOT_UPLOADED_PATH = mountpoint + "/stats/notuploaded"
    time_waited = 0
    if os.path.exists(CLOUDFUSION_NOT_UPLOADED_PATH):
        if timeout_in_s == float("inf"):
            fsh = ProgressFish(total=10000000000)
        else:
            fsh = ProgressFish(total=timeout_in_s)
        while os.path.getsize(CLOUDFUSION_NOT_UPLOADED_PATH) > 0:
            sleep(10)
            time_waited += 10
            fsh.animate(amount=time_waited)
            if time_waited > timeout_in_s:
                break
        return
    print ""
    
    start = time.time()
    
    def no_network_activity(line):
        try:
            kbit_per_5min = sum(map(int, line.split()))
            if kbit_per_5min < 200:
                return True
        except ValueError:
            pass
        if start + timeout_in_s < time.time():
            return True
        return False
    p = ifstat('-bzn', '600', _out=(lambda x:no_network_activity ))
    p.wait()
    p.kill()

Exemple #20

0

Afficher le fichier

Fichier : fetch.py Projet : dandavison/feeder

def fetch(urls):
    now = get_datetime(gmtime())
    with futures.ThreadPoolExecutor(max_workers=5) as executor:
        future_to_url = {executor.submit(feedparser.parse, url): url
                         for url in urls}

    fish = ProgressFish(total=len(urls))
    i = 0
    for future in futures.as_completed(future_to_url):
        url = future_to_url[future]
        if future.exception() is not None:
            log('Error reading %r: %s' % (url, future.exception()))
            continue

        feed = future.result()
        _feed, created = Feed.objects.get_or_create(url=url)
        for entry in feed.entries:
            try:
                pub_time = get_datetime(entry.date_parsed)
            except AttributeError:
                log('No date_parsed attribute on entry')
                continue
            except:
                log('Error reading entry date')
                continue

            try:
                assert pub_time - now < TIME_EPSILON
            except AssertionError:
                print >>sys.stderr, 'Entry is from future? %s %s' % (pub_time, url)

            _entry, created = Entry.objects.get_or_create(feed=_feed, pub_time=pub_time)
            for item in get_items(entry):
                Item.objects.get_or_create(value=item, entry=_entry)

        fish.animate(amount=i)
        i += 1

Exemple #21

0

Afficher le fichier

Fichier : copy_to_new_bucket.py Projet : rerb/hub

    def handle(self, *args, **options):

        conn = boto.connect_s3()
        src = conn.get_bucket('aashe-hub-dev')
        dst = conn.get_bucket('aashe-hub-production')
        dst_keys = [k.key for k in dst.list()]

        print "Copying all Files..."

        file_qs = File.objects.filter(item__isnull=False)
        fish = ProgressFish(total=file_qs.count())
        count = 0
        for f in file_qs:
            count += 1
            fish.animate(amount=count)
            key = urlparse(f.item).path[1:]
            # if it doesn't already exist:
            if key not in dst_keys:
                dst.copy_key(key, src.name, key)

        print "Copying all Images..."

        image_qs = Image.objects.filter(image__isnull=False)
        fish2 = ProgressFish(total=image_qs.count())
        count = 0
        for i in image_qs:
            count += 1
            fish2.animate(amount=count)
            key = urlparse(i.image).path[1:]
            # if it doesn't already exist:
            if key not in dst_keys:
                try:
                    dst.copy_key(key, src.name, key)
                except boto.exception.S3ResponseError:
                    print "**** failed to copy: %s" % key

        print

Exemple #22

0

Afficher le fichier

def progress(iterable, **kwargs):
    fish = ProgressFish(**kwargs)
    for i, item in enumerate(iterable):
        yield item
        fish.animate(amount=i)

Exemple #23

0

Afficher le fichier

Fichier : lfs-fetch.py Projet : jda-iocom/dotfiles

def main(config_path, desc_path, target_path):
    massaged = io.StringIO()
    with io.open(config_path, 'rU') as infile:
        massaged.writelines(line.lstrip() for line in infile)
    massaged.seek(0)
    config = RawConfigParser()
    config.readfp(massaged)
    lfs_url = config.get('lfs', 'url').strip('"')
    api_url = posixpath.join(lfs_url, 'objects', 'batch')

    with io.open(desc_path, 'rU') as infile:
        target = dict(line.strip().partition(' ')[::2] for line in infile)
    if target.get('version') != 'https://git-lfs.github.com/spec/v1':
        raise ValueError("can't handle lfs", target['version'])
    oid_type, sep, oid = target['oid'].partition(':')
    if oid_type != 'sha256':
        raise ValueError("can't handle oid", target['oid'])
    size = int(target['size'])

    sys.stderr.write('Fetching {!r} from lfs...\n'.format(
        os.path.basename(target_path)))
    try:
        infile = open(target_path, 'rb')
    except IOError as e:
        if e.errno != errno.ENOENT:
            raise
    else:
        if file_matches(infile, size, oid):
            sys.stderr.write('Lucky! It was already up to date.\n')
            return

    req = Request(api_url, json.dumps({
        'operation': 'download',
        'objects': [{
            'oid': oid,
            'size': size,
        }],
    }).encode(), {
        'Accept': JSON_TYPE,
        'Content-Type': JSON_TYPE,
    })
    with contextlib.closing(urlopen(req)) as respfile:
        if WRAP_RESPFILE:
            respfile = io.TextIOWrapper(respfile)
        resp = json.load(respfile)

    url = next(obj['actions']['download']['href']
               for obj in resp['objects']
               if obj['oid'] == oid)
    with contextlib.closing(urlopen(url)) as respfile:
        hasher = hashlib.sha256()
        with tempfile.NamedTemporaryFile(
                dir=os.path.dirname(target_path)) as outfile:
            fish = ProgressFish(total=size)
            fetched = 0
            for chunk in iter(lambda: respfile.read(8192), b''):
                fetched += len(chunk)
                fish.animate(amount=fetched)
                hasher.update(chunk)
                outfile.write(chunk)
            if hasher.hexdigest() != oid:
                raise ValueError('hash failure', hasher.hexdigest(), oid)
            os.rename(outfile.name, target_path)
            open(outfile.name, 'w').close()

Exemple #24

0

Afficher le fichier

            else:
                chapter_data = {'name': c.name, 'description': c.description}
                chapters_to_check.append(chapter_data)

    else:
        pass

sites_with_links_back = []
sites_with_no_links_back = []

fish = ProgressFish(total=len(chapters_with_web_sites))
for i, c in enumerate(chapters_with_web_sites):
    req = c['urls'][0]
    try:
        r = br.open(req)
        fish.animate(amount=i)
    except urllib2.HTTPError, e:
        print e.code
        continue
    c['urls'] = r.geturl()
    doc = r.read()
    soup = BeautifulSoup.BeautifulSoup(doc)
    try:
        href = soup.findAll('a',
                            {'href': re.compile('thezeitgeistmovement.com')})
        if href:
            sites_with_links_back.append(c)
        else:
            sites_with_no_links_back.append(c)
    except:
        continue

Exemple #25

0

Afficher le fichier

def do_records(records):
    num_records = LIMIT
    fish = ProgressFish(total=num_records)
    for i, record in enumerate(records):
        fish.animate(amount=i)
        keys = record.keys()
        x = get_o_list(record, x_keys)
        y = get_o_list(record, y_keys)

        y_count = len(
            filter((lambda r: r != "\"\"" and r != "''" and r != ""), y))
        x_count = len(
            filter((lambda r: r != "\"\"" and r != "''" and r != ""), x))

        if y_count == 0 or x_count == 0: continue

        home_dep_details = [""] * 16

        try:
            home_lsoa_code = record['"CEN_LSOA"'][1:-1]
            if home_lsoa_code != None and home_lsoa_code != '':
                home_dep_details = dep_2_mappings.find_one(
                    {'code': home_lsoa_code})['dep']
        except Exception as e:
            print "home - probably wales/scotland"
            print e

        x += home_dep_details

        sd = [""] * 133

        try:
            sd2 = schools_data.find_one(
                {'KS5_11SCHNAME': record["\"SCH_SCHOOLNAME\""][1:-1]},
                {'_id': 0})
            if sd2 != None:
                sd = map((lambda k: sd2[k]), [
                    "LURN", "LLA", "LESTAB", "LLAESTAB", "LSCHNAME", "LSTREET",
                    "LLOCALITY", "LADDRESS3", "LTOWN", "LPOSTCODE", "LTELNUM",
                    "LICLOSE", "LISNEW", "LMINORGROUP", "LNFTYPE",
                    "LISPRIMARY", "LISSECONDARY", "LISPOST16", "LAGEL",
                    "LAGEH", "LGENDER", "LSFGENDER", "LRELDENOM", "LADMPOL",
                    "LNEWACFLAG", "KS5_11RECTYPE", "KS5_11ALPHAIND",
                    "KS5_11REGION", "KS5_11LASORT", "KS5_11LEA", "KS5_11ESTAB",
                    "KS5_11URN", "KS5_11SCHNAME_AC", "KS5_11SCHNAME",
                    "KS5_11ADDRESS1", "KS5_11ADDRESS2", "KS5_11ADDRESS3",
                    "KS5_11TOWN", "KS5_11PCODE", "KS5_11TELNUM",
                    "KS5_11CONTFLAG", "KS5_11NFTYPE", "KS5_11RELDENOM",
                    "KS5_11ADMPOL", "KS5_11GENDER1618", "KS5_11FEEDER",
                    "KS5_11AGERANGE", "KS5_11ICLOSE", "KS5_11TABKS2",
                    "KS5_11TAB15", "KS5_11EXAMCONF", "KS5_11DUMMY1",
                    "KS5_11TPUP1618", "KS5_11TALLPUPA", "KS5_11TALLPPSA",
                    "KS5_11TALLPPEA", "KS5_11PTPASS1L3", "KS5_11PTPASS2LV3",
                    "KS5_11PTPASS3LV3", "KS5_11TALLPPS08", "KS5_11TALLPPS09",
                    "KS5_11TALLPPS10", "KS5_11TALLPPE08", "KS5_11TALLPPE09",
                    "KS5_11TALLPPE10", "ABS_11LA", "ABS_11ESTAB", "ABS_11URN",
                    "ABS_11PERCTOT", "ABS_11PERCUA", "ABS_11PPERSABS15",
                    "ABS_11PPERSABS20", "CFR_11URN", "CFR_11LANUMBER",
                    "CFR_11LONDON/NON-LONDON", "CFR_11MEDIAN", "CFR_11PUPILS",
                    "CFR_11FSM", "CFR_11FSMBAND", "CFR_11GRANTFUNDING",
                    "CFR_11SELFGENINCOME", "CFR_11TOTALINCOME",
                    "CFR_11TEACHINGSTAFF", "CFR_11SUPPLYTEACHERS",
                    "CFR_11EDUCATIONSUPPORTSTAFF", "CFR_11PREMISES",
                    "CFR_11BACKOFFICE", "CFR_11CATERING", "CFR_11OTHERSTAFF",
                    "CFR_11ENERGY", "CFR_11LEARNINGRESOURCES", "CFR_11ICT",
                    "CFR_11BOUGHTIN", "CFR_11OTHER", "CFR_11TOTALEXPENDITURE",
                    "SWF_11LA", "SWF_11URN", "SWF_11NTEA", "SWF_11NTEAAS",
                    "SWF_11NNONTEA", "SWF_11NFTETEA", "SWF_11NFTETEAAS",
                    "SWF_11RATPUPTEA", "SWF_11SALARY", "CENSUS_11URN",
                    "CENSUS_11LAESTAB", "CENSUS_11NUMFTE",
                    "CENSUS_11TOTPUPSENDN", "CENSUS_11TSENSAP",
                    "CENSUS_11TSENA", "CENSUS_11TOTSENST", "CENSUS_11TOTSENAP",
                    "CENSUS_11PSENSAP", "CENSUS_11PSENA", "CENSUS_11PTOTSENST",
                    "CENSUS_11PTOTSENAP", "CENSUS_11TOTPUPEALDN",
                    "CENSUS_11NUMEAL", "CENSUS_11NUMENGFL",
                    "CENSUS_11NUMUNCFL", "CENSUS_11PNUMEAL",
                    "CENSUS_11PNUMENGFL", "CENSUS_11PNUMUNCFL",
                    "CENSUS_11TOTPUPFSMDN", "CENSUS_11NUMFSM",
                    "CENSUS_11NUMNOFSM", "CENSUS_11PNUMFSM",
                    "CENSUS_11PNUMNOFSM", "OLA", "OURN", "OSCHOOLNAME",
                    "OPHASE", "OREPORTURL"
                ])
        except Exception as e:
            print "school details"
            print e

        x += sd

        school_dep_details = [""] * 16

        try:
            school_postcode = record["\"SCH_POSTCODE\""][1:-1]
            school_lsoa_code = postcodes.find_one(
                {'Postcode2': school_postcode}, {'Code': 1})
            if school_lsoa_code != None:
                school_lsoa_code = school_lsoa_code['Code']
                school_dep_details = dep_2_mappings.find_one(
                    {'code': school_lsoa_code})['dep']
        except Exception as e:
            print "school deps"
            print e

        x += school_dep_details

        coll.insert({'x': x, 'y': y})

Exemple #26

0

Afficher le fichier

Fichier : DextersThunderfishAddition.py Projet : Aeapsu/thunderfish

def analyze_long_pulse_data_file(filepath,
                                 save=0,
                                 plot_steps=0,
                                 new=1,
                                 starttime=0,
                                 endtime=0):
    """
    analyzes timeseries of a pulse fish EOD recording
    """
    #    Script to detect and classify EODs in recordings of weakly electric pulse
    #    fish, Dexter Früh, 2018
    #
    #    results will be saved in workingdirectory/recording/
    #
    #    input:
    #      -  [Recorded Timeseries] recording.WAV
    #    outputs(optional):
    #      -  [Detected and Classified EODs]
    #            (Numpy Array with Shape (Number of EODs, 4 (Attributes of EODs)),
    #            with the EOD-Attributes
    #               -   x-location of the EOD
    #                       (time/x-coordinate/datapoint in recording)
    #               -   y-location of the EOD
    #                       (Amplitude of the positive peak of the pulse-EOD)
    #               -   height of the EOD(largest distance between peak and through in the EOD)
    #               -   class of the EOD
    #           eods_recording.npy
    #      -   [plots of the results of each analyse step for each
    #               analysepart (timeinterval of length = deltat) of the recording]
    #
    #    required command line arguments at function call
    #        - save  : if True, save the results to a numpy file (possibly
    #                                                          overwrite existing)
    #        - plot  : if True, plot results in each analysestep
    #        - new   : if True, do a new analysis of the recording, even if there
    #                       is an existing analyzed .npy file with the right name.
    #
    import sys
    import numpy as np
    import copy
    from scipy.stats import gmean
    from scipy import stats
    from scipy import signal
    from scipy import optimize
    import matplotlib
    from fish import ProgressFish
    import matplotlib.pyplot as plt
    from thunderfish.dataloader import open_data
    from thunderfish.peakdetection import detect_peaks
    from scipy.interpolate import interp1d
    from scipy.signal import savgol_filter
    from collections import deque
    import ntpath
    import nixio as nix
    import time
    import os
    from shutil import copy2
    from ownDataStructures import Peak, Tr, Peaklist
    import DextersThunderfishAddition as dta
    from IPython import embed
    # parameters for the analysis

    deltat = 30.0  # seconds of buffer size
    thresh = 0.04  # minimal threshold for peakdetection
    peakwidth = 20  # width of a peak and minimal distance between two EODs
    # basic parameters for thunderfish.dataloader.open_data
    verbose = 0
    channel = 0
    ultimate_threshold = thresh + 0.01
    startblock = 0
    # timeinterval to analyze other than the whole recording
    #starttime = 0
    #endtime = 0
    #timegiven =  0
    home = os.path.expanduser('~')
    os.chdir(home)
    new = int(sys.argv[4])
    save = int(sys.argv[2])
    plot = int(sys.argv[3])
    starttime = int(starttime)
    endtime = int(endtime)
    timegiven = False
    if endtime > starttime >= 0:
        timegiven = True
    peaks = np.array([])
    troughs = np.array([])
    filename = path_leaf(filepath)
    datasavepath = filename[:-4]
    proceed = input(
        'Currently operates in home directory. If given a pulsefish recording filename.WAV, then a folder filename/ will be created in the home directory and all relevant files will be stored there. continue? [y/n] '
    ).lower()
    if proceed != 'y':
        quit()
    if not os.path.exists(datasavepath):
        os.makedirs(datasavepath)
    if save == 1:
        print('files will be saved to: ', datasavepath)
    eods_len = 0
    # starting analysis
    if new == 1 or not os.path.exists(filename[:-4] + "/eods5_" +
                                      filename[:-3] + "npy"):
        if filepath != home + '/' + datasavepath + '/' + filename:
            print(filepath, datasavepath + '/' + filename)
            proceed = input(
                'Copy datafile to ' + datasavepath +
                ' where all the other files will be stored? [y/n] ').lower()
            if proceed == 'y':
                copy2(filepath, datasavepath)
        # import data
        with open_data(filepath, channel, deltat, 0.0, verbose) as data:
            samplerate = data.samplerate
            nblock = int(deltat * data.samplerate)

            # selected time interval
            if timegiven == True:
                parttime1 = starttime * samplerate
                parttime2 = endtime * samplerate
                data = data[parttime1:parttime2]

            #split data into blocks
            if len(data) % nblock != 0:
                blockamount = len(data) // nblock + 1
            else:
                blockamount = len(data) // nblock

            # progress bar
            print('blockamount: ', blockamount)
            progress = 0
            print(progress, '%', flush=True, end=" ")
            fish = ProgressFish(total=blockamount)

            # blockwise analysis
            for idx in range(0, blockamount):
                blockdata = data[idx * nblock:(idx + 1) * nblock]
                # progressbar
                if progress < (idx * 100 // blockamount):
                    progress = (idx * 100) // blockamount
                progressstr = ' Filestatus: '
                fish.animate(amount=idx, dexextra=progressstr)
                #---analysis-----------------------------------------------------------------------
                # step1: detect peaks in timeseries
                pk, tr = detect_peaks(blockdata, thresh)
                troughs = tr
                # continue with analysis only if multiple peaks are detected
                if len(pk) > 3:
                    peaks = dta.makeeventlist(pk, tr, blockdata, peakwidth)

                    #dta.plot_events_on_data(peaks, blockdata)
                    peakindices, peakx, peakh = dta.discardnearbyevents(
                        peaks[0], peaks[1], peakwidth)
                    peaks = peaks[:, peakindices]

                    if len(peaks) > 0:
                        # used to connect the results of the current block with the previous
                        if idx > startblock:
                            peaklist = dta.connect_blocks(peaklist)
                        else:
                            peaklist = Peaklist([])
                        aligned_snips = dta.cut_snippets(blockdata,
                                                         peaks[0],
                                                         15,
                                                         int_met="cubic",
                                                         int_fact=10,
                                                         max_offset=1.5)
                        pcs = dta.pc(
                            aligned_snips)  #pc_refactor(aligned_snips)
                        order = 5
                        minpeaks = 3 if deltat < 2 else 10
                        labels = dta.cluster_events(pcs,
                                                    peaks,
                                                    order,
                                                    0.4,
                                                    minpeaks,
                                                    False,
                                                    method='DBSCAN')
                        peaks = np.append(peaks, [labels], axis=0)
                        #dta.plot_events_on_data(peaks, blockdata)
                        num = 1
                        if idx > startblock:
                            dta.alignclusterlabels(labels,
                                                   peaklist,
                                                   peaks,
                                                   data=blockdata)
                        peaks, peaklist = dta.ampwalkclassify3_refactor(
                            peaks, peaklist)  # classification by amplitude
                        minlen = 6  # >=1
                        peaks = dta.discard_short_classes(peaks, minlen)
                        if len(peaks[0]) > 0:
                            peaks = dta.discard_wave_pulses(peaks, blockdata)
                        # plots the data part and its detected and classified peaks
                        if plot_steps == True:
                            dta.plot_events_on_data(peaks, blockdata)
                            pass
                    worldpeaks = np.copy(peaks)
                    # change peaks location in the buffered part to the location relative to the
                    peaklist.len = nblock
                    # peaklocations relative to whole recording
                    worldpeaks[0] = worldpeaks[0] + (idx * nblock)
                    thisblock_eods = np.delete(peaks, 3, 0)
                    # save the peaks of the current buffered part to a numpy-memmap on the disk
                    mmpname = "eods_" + filename[:-3] + "npmmp"
                    save_EOD_events_to_npmmp(thisblock_eods, eods_len,
                                             idx == startblock, datasavepath,
                                             mmpname)
                    eods_len += len(thisblock_eods[0])
        # after the last buffered part has finished, save the memory mapped
        # numpy file of the detected and classified EODs to a .npy file to the
        # disk
        eods = np.memmap(datasavepath + "/eods_" + filename[:-3] + "npmmp",
                         dtype='float64',
                         mode='r+',
                         shape=(4, eods_len),
                         order='F')
        if save == 1:
            path = datasavepath + "/"
            if not os.path.exists(path):
                os.makedirs(path)
            if eods_len > 0:
                print('Saved!')
                np.save(datasavepath + "/eods8_" + datasavepath + "npy", eods)
            else:
                #np.save(filename[:-4]+"/eods5_"+filename[:-3]+"npy", thisblock_eods)

                print('not saved')
    else:  # if there already has been a certain existing result file and 'new' was set to False
        print('already analyzed')
    print(
        'returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude'
    )
    return eods

Exemple #27

0

Afficher le fichier

                            'label_mm' :mm_label,
                          }

            pyithu_list.append(pyithu_json)

        post['pyithu'] = pyithu_list

        amyotha_list = []

        for amyotha_id in amyotha_ids:
            r = requests.get(base_url+'/en/posts/'+ amyotha_id)
            en_label = r.json()['result']['label']
            r = requests.get(base_url+'/my/posts/'+ amyotha_id)
            mm_label = r.json()['result']['label']

            amyotha_json = { 'popit_id': amyotha_id,
                            'label_en': en_label,
                            'label_mm' :mm_label,
                          }
            amyotha_list.append(amyotha_json)


	post['amyotha'] = amyotha_list

  	fish.animate(amount=progress)	

json_out = json.dumps(posts, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf8')

with io.open('townships.json', 'w', encoding='utf8') as json_file:
	json_file.write(unicode(json_out))

Exemple #28

0

Afficher le fichier

Fichier : virtualbox.py Projet : jg19/playground

    session = manager.mgr.getSessionObject(vbox)
    machine.lockMachine(session, 1)
    console = session.console
    usb_uuid = searchUSBBySerial(serial)
    console.attachUSBDevice(usb_uuid)
    manager.closeMachineSession(session)


def machineIpAddress(machine):
    return machine.getGuestProperty('/VirtualBox/GuestInfo/Net/0/V4/IP')[0]

if __name__ == '__main__':
    machine_uuid = str(uuid.uuid4())
    flags = 'UUID={0}'.format(machine_uuid)
    #machine = createMachine(None, machine_uuid, [], 'Debian', flags)
    machine = createMachine(machine_uuid, uuid=machine_uuid,
                            settings_file=None, groups=[], os_type_id='Debian',
                            flags=flags, force_overwrite=False)
    fromMachine = searchMachine('987ae866-a4c0-4723-896e-8897fe17f3f0')
    progress = cloneMachine(fromMachine, machine, 1, [])
    while progress.operationPercent < 100:
        fish.animate(amount=progress.operationPercent)
    vbox.registerMachine(machine)
    progress, session = launchMachine(machine)
    while progress.operationPercent < 100:
        fish.animate(amount=progress.operationPercent)
    manager.closeMachineSession(session)
    time.sleep(60)
    attachUSBBySerial(machine, '8A000080Q')
    print machineIpAddress(machine)

Exemple #29

0

Afficher le fichier

Fichier : firstprobe.py Projet : akshaykatre/imdb_datasets

            if type(in_map[k]) == int or type(in_map[k]) == float:
                in_dict[0].update({k: in_map[k]})
            if type(in_map[k]) == unicode:
                in_dict[0].update({k: in_map[k].encode('utf-8')})
            # if in_map.has_key('kind'):
            #     pdb.set_trace()
            if type(in_map[k]) == list:
                handle_lists(in_map[k], in_dict, k)

result = []
fish = ProgressFish(total=len(movie_ids))

for index, i in enumerate(movie_ids):
    # if index%100 == 0:
    #     print "On movie number: ", index
    fish.animate(amount=index)
    m = im.get_movie(i)
    maps = {}
    for keys in m.iterkeys():
      #  print keys, m[keys]
        maps.update({keys:m[keys]})
        dicts = [{}]

    if maps != {} : ## To ensure that maps are filled, otherwise dicts is not defined! 
        makerows(maps, dicts)
#        print dicts
        result.append(df1.append(dicts))
#        print result

x = pandas.concat(result)
x.to_csv("results_5k.csv", encoding="utf-8")

Exemple #30

0

Afficher le fichier

Fichier : import.py Projet : hrickards/devisive

ks51 = db.ks5_0910
ks52 = db.ks5_1011
mappings = db.postcode_lsoa
mappings.drop()

def get_p(di): return set(map((lambda r: r['"SCH_POSTCODE"']), di))

records = list(get_p(ks51.find({},{'"SCH_POSTCODE"':1}).limit(LIMIT)).intersection(get_p(ks52.find({},{'"SCH_POSTCODE"':1}).limit(LIMIT))))
postcodes = map((lambda r: r[1:-1].replace(' ', '')), records)
 
print len(postcodes)

i = 0

fish = ProgressFish(total=len(postcodes))

for postcode in postcodes:
    time.sleep(0.1)
    fish.animate(amount=i)
    try:
        i += 1
        url = "http://mapit.mysociety.org/postcode/%s" % postcode
        result = simplejson.load(urllib.urlopen(url))
        lsoa_code = filter((lambda area: area['type_name'] == "Lower Layer Super Output Area (Full)"), result['areas'].values())[0]['name']
        mappings.insert({'lsoa_code': lsoa_code, 'postcode': postcode})

    except Exception as e:
        print "NOO"

mappings.create_index([("postcode", pymongo.ASCENDING)])

Exemple #31

0

Afficher le fichier

Fichier : DextersThunderfishAddition.py Projet : Aeapsu/thunderfish

def analyze_pulse_data(filepath,
                       absolutepath=True,
                       deltat=30,
                       thresh=0.04,
                       starttime=0,
                       endtime=0,
                       savepath=False,
                       save=False,
                       npmmp=False,
                       plot_steps=False,
                       plot_result=False):
    '''
    analyzes timeseries of a pulse fish EOD recording

    Parameters
    ----------
    filepath: WAV-file with the recorded timeseries

    deltat: int, optional
        time for a single analysisblock (recommended less than a minute, due to principal component clustering on the EOD-waveforms)

    thresh: float, optional
        minimum threshold for the peakdetection (if computing frequencies recommended a tiny bit lower than the wished threshold, and instead discard the EOD below the wished threshold after computing the frequencies for each EOD.)

    starttime: int or, str of int, optional
        time into the data from where to start the analysis, seconds.

    endtime: int or str of int, optional
        time into the data where to end the analysis, seconds, larger than starttime.

    savepath = Boolean or str, optional
        path to where to save results and intermediate result, only needed if save or npmmp is True.
        string to specify a relative path to the directory where results and intermediate results will bed
        or False to use preset savepath, which is ~/filepath/
        or True to specify savepath as input when the script is running

    save: Boolean, optional
        True to save the results into a npy file at the savepath

    npmmp: Boolean, optional
        True to save intermediate results into a npmmp at the savepath, only recommended in case of memory overflow

    plot_steps: Boolean, optional
        True to plot the results of each analysis block

    plot_results: Boolean, optional
        True to plot the results of the final analysis. Not recommended for long recordings due to %TODO

    Returns
    -------
    eods: numpy array
        2D numpy array. first axis: attributes of an EOD (x (datapoints), y (recorded voltage), height (difference from maximum to minimum), class), second axis: EODs in chronological order.
    '''
    import sys
    import numpy as np
    import copy
    from scipy.stats import gmean
    from scipy import stats
    from scipy import signal
    from scipy import optimize
    import matplotlib
    from fish import ProgressFish
    import matplotlib.pyplot as plt
    from thunderfish.dataloader import open_data
    from thunderfish.peakdetection import detect_peaks
    from scipy.interpolate import interp1d
    from scipy.signal import savgol_filter
    from collections import deque
    import ntpath
    import nixio as nix
    import time
    import os
    from shutil import copy2
    from ownDataStructures import Peak, Tr, Peaklist
    import DextersThunderfishAddition as dta
    from IPython import embed
    # parameters for the analysis
    thresh = 0.04  # minimal threshold for peakdetection
    peakwidth = 20  # width of a peak and minimal distance between two EODs
    # basic parameters for thunderfish.dataloader.open_data
    verbose = 0
    channel = 0
    ultimate_threshold = thresh + 0.01
    startblock = 0
    # timeinterval to analyze other than the whole recording
    #starttime = 0
    #endtime = 0
    #timegiven =  0
    home = os.path.expanduser('~')
    if absolutepath:
        filepath = home + '/' + filepath
    #os.chdir(home)
    #save = int(save)
    #plot_steps = int(plot_steps)
    starttime = int(starttime)
    endtime = int(endtime)
    timegiven = False
    if endtime > starttime >= 0:
        timegiven = True
    peaks = np.array([])
    troughs = np.array([])
    filename = path_leaf(filepath)
    eods_len = 0
    if savepath == False:
        datasavepath = home + '/' + filename[:-4]
    elif savepath == True:
        datasavepath = input(
            'With the option npmmp enabled, a numpy memmap will be saved to: '
        ).lower()
    else:
        datasavepath = savepath

    if save and (
            os.path.exists(datasavepath + "/eods8_" + filename[:-3] + "npy") or
            os.path.exists(datasavepath + "/eods5_" + filename[:-3] + "npy")):
        print(
            'there already exists an analyzed file, aborting. Change the code if you don\'t want to abort'
        )
        quit()
    if npmmp:
        #proceed = input('With the option npmmp enabled, a numpy memmap will be saved to ' + datasavepath + '. continue? [y/n] ').lower()
        proceed = 'y'
        if proceed != 'y':
            quit()
    # starting analysis
    with open_data(filepath, channel, deltat, 0.0, verbose) as data:

        samplerate = data.samplerate

        # selected time interval
        if timegiven == True:
            parttime1 = starttime * samplerate
            parttime2 = endtime * samplerate
            data = data[parttime1:parttime2]

        #split data into blocks
        nblock = int(deltat * samplerate)
        if len(data) % nblock != 0:
            blockamount = len(data) // nblock + 1
        else:
            blockamount = len(data) // nblock
        print('blockamount: ', blockamount)
        progress = 0
        print(progress, '%', flush=True, end=" ")
        fish = ProgressFish(total=blockamount)
        for idx in range(0, blockamount):
            blockdata = data[idx * nblock:(idx + 1) * nblock]
            if progress < (idx * 100 // blockamount):
                progress = (idx * 100) // blockamount
            progressstr = ' Filestatus: '
            fish.animate(amount=idx, dexextra=progressstr)
            pk, tr = detect_peaks(blockdata, thresh)
            troughs = tr
            if len(pk) > 3:
                peaks = dta.makeeventlist(pk, tr, blockdata, peakwidth)
                peakindices, peakx, peakh = dta.discardnearbyevents(
                    peaks[0], peaks[1], peakwidth)
                peaks = peaks[:, peakindices]
                if len(peaks) > 0:
                    if idx > startblock:
                        peaklist = dta.connect_blocks(peaklist)
                    else:
                        peaklist = Peaklist([])
                    aligned_snips = dta.cut_snippets(blockdata,
                                                     peaks[0],
                                                     15,
                                                     int_met="cubic",
                                                     int_fact=10,
                                                     max_offset=1.5)
                    pcs = dta.pc(aligned_snips)  #pc_refactor(aligned_snips)
                    order = 5
                    minpeaks = 3 if deltat < 2 else 10
                    labels = dta.cluster_events(pcs,
                                                peaks,
                                                order,
                                                0.4,
                                                minpeaks,
                                                False,
                                                method='DBSCAN')
                    peaks = np.append(peaks, [labels], axis=0)
                    #dta.plot_events_on_data(peaks, blockdata)
                    num = 1
                    if idx > startblock:
                        dta.alignclusterlabels(labels,
                                               peaklist,
                                               peaks,
                                               data=blockdata)
                    peaks, peaklist = dta.ampwalkclassify3_refactor(
                        peaks, peaklist)  # classification by amplitude
                    minlen = 6
                    peaks = dta.discard_short_classes(peaks, minlen)
                    if len(peaks[0]) > 0:
                        peaks = dta.discard_wave_pulses(peaks, blockdata)
                    if plot_steps == True:
                        dta.plot_events_on_data(peaks, blockdata)
                        pass
                    peaklist.len = nblock
                    worldpeaks = np.copy(peaks)
                    worldpeaks[0] = worldpeaks[0] + (idx * nblock)
                    thisblock_eods = np.delete(worldpeaks, 3, 0)
                    if npmmp:
                        if idx == startblock:
                            if not os.path.exists(datasavepath):
                                os.makedirs(datasavepath)
                            mmpname = "eods_" + filename[:-3] + "npmmp"
                        # save the peaks of the current buffered part to a numpy-memmap on the disk
                        save_EOD_events_to_npmmp(thisblock_eods, eods_len,
                                                 idx == startblock,
                                                 datasavepath, mmpname)
                        eods_len += len(thisblock_eods[0])
                    else:
                        if idx > 0:
                            all_eods = np.concatenate(
                                (all_eods, thisblock_eods), axis=1)
                        else:
                            all_eods = thisblock_eods
    #dta.plot_events_on_data(all_eods,data)
    print(
        'returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude'
    )
    if npmmp:
        all_eods = np.memmap(datasavepath + '/' + mmpname,
                             dtype='float64',
                             mode='r+',
                             shape=(4, eods_len),
                             order='F')
    if save == 1:
        path = filename[:-4] + "/"
        if not os.path.exists(path):
            os.makedirs(path)
        if eods_len > 0:
            np.save(datasavepath + "/eods8_" + filename[:-3] + "npy", all_eods)
            print('Saved!')
        else:
            print('not saved')
    return all_eods