def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('tanPath', help='file path: to tanimoto matrix pickle"../foo.pickle" ')
    parser.add_argument('cosPath', help='file path: to cosine matrix pickle"../foo.pickle" ')
    parser.add_argument('outPath', help='file path: to merged matrix pickle"../foo.pickle" ')
    parser.add_argument('lmbd', help='between 0 and 1')
    args = parser.parse_args()
    
    print 'loading tanimoto matrix pickle...'
    f = open(args.tanPath)
    tanMatrix = cPickle.load(f)
    f.close()
    print 'loading cosine matrix pickle...'
    f = open(args.cosPath)
    cosMatrix = cPickle.load(f)
    f.close()
    
    result = copy(cosMatrix)
    length = len(tanMatrix)
    fish = ProgressFish(total = length )
    for i in range(length):
        result[i] = fishers_chiSquare_method( cosMatrix[i], tanMatrix[i], float(args.lmbd) )
        fish.animate(amount=i)
    print 'pickling to '+args.outPath
    f = open(args.outPath, 'w')
    cPickle.dump( result, f ) 
    f.close()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'tanPath', help='file path: to tanimoto matrix pickle"../foo.pickle" ')
    parser.add_argument(
        'cosPath', help='file path: to cosine matrix pickle"../foo.pickle" ')
    parser.add_argument(
        'outPath', help='file path: to merged matrix pickle"../foo.pickle" ')
    parser.add_argument('lmbd', help='between 0 and 1')
    args = parser.parse_args()

    print 'loading tanimoto matrix pickle...'
    f = open(args.tanPath)
    tanMatrix = cPickle.load(f)
    f.close()
    print 'loading cosine matrix pickle...'
    f = open(args.cosPath)
    cosMatrix = cPickle.load(f)
    f.close()

    result = copy(cosMatrix)
    length = len(tanMatrix)
    fish = ProgressFish(total=length)
    for i in range(length):
        result[i] = fishers_chiSquare_method(cosMatrix[i], tanMatrix[i],
                                             float(args.lmbd))
        fish.animate(amount=i)
    print 'pickling to ' + args.outPath
    f = open(args.outPath, 'w')
    cPickle.dump(result, f)
    f.close()
예제 #3
0
    def fit(self, x_train, y_train, x_dev=None, y_dev=None, batch_size=100):
        train_fn = theano.function(inputs=[self.batch_x, self.batch_y],
                                   outputs=self.cost,
                                   updates=self.updates,
                                   givens={
                                       x: self.batch_x,
                                       y: self.batch_y
                                   })

        train_set_iterator = DatasetMiniBatchIterator(self.rng,
                                                      x_train,
                                                      y_train,
                                                      batch_size=batch_size,
                                                      randomize=True)
        dev_set_iterator = DatasetMiniBatchIterator(self.rng,
                                                    x_dev,
                                                    y_dev,
                                                    batch_size=batch_size,
                                                    randomize=False)

        train_score = self._batch_score(train_set_iterator)
        dev_score = self._batch_score(dev_set_iterator)

        best_dev_error = numpy.inf
        epoch = 0
        timer_train = time.time()
        while epoch < n_epochs:
            avg_costs = []
            timer = time.time()
            fish = ProgressFish(total=len(train_set_iterator))
            for i, (x, y) in enumerate(train_set_iterator, 1):
                fish.animate(amount=i)

                avg_cost = train_fn(x, y)
                if type(avg_cost) == list:
                    avg_costs.append(avg_cost[0])
                else:
                    avg_costs.append(avg_cost)

            mean_cost = numpy.mean(avg_costs)
            mean_train_error = numpy.mean(train_score())
            dev_error = numpy.mean(dev_score())
            print(
                'epoch {} took {:.4f} seconds; '
                'avg costs: {:.4f}; train error: {:.4f}; '
                'dev error: {:.4f}'.format(epoch,
                                           time.time() - timer, mean_cost,
                                           mean_train_error, dev_error))

            if dev_error < best_dev_error:
                best_dev_error = dev_error
                best_params = [numpy.copy(p.get_value()) for p in params]
            epoch += 1

        print('Training took: {:.4f} seconds'.format(time.time() -
                                                     timer_train))
        for i, param in enumerate(best_params):
            params[i].set_value(param, borrow=True)
예제 #4
0
def get_vector_list( refs, refs_base):

    fish = ProgressFish(total=len(refs_base))

    vector_list = ref_to_vector( refs_base[0], refs) # init
    for i in range(1, len(refs_base)):
        column_vector = ref_to_vector( refs_base[i],refs )
        vector_list = column_stack( [ vector_list , column_vector ] )
        fish.animate(amount=i)
    return vector_list
예제 #5
0
def do_records(records):
    num_records = LIMIT
    fish = ProgressFish(total=num_records)
    for i, record in enumerate(records):
        fish.animate(amount=i)
        keys = record.keys()
        x = get_o_list(record, x_keys)
        y = get_o_list(record, y_keys)

        y_count = len(filter((lambda r: r != "\"\"" and r != "''" and r != ""), y))
        x_count = len(filter((lambda r: r != "\"\"" and r != "''" and r != ""), x))

        if y_count == 0 or x_count == 0: continue

        home_dep_details = [""]*16

        try:
            home_lsoa_code = record['"CEN_LSOA"'][1:-1]
            if home_lsoa_code != None and home_lsoa_code != '':
                home_dep_details = dep_2_mappings.find_one({'code': home_lsoa_code})['dep']
        except Exception as e:
            print "home - probably wales/scotland"
            print e

        x += home_dep_details

        sd = [""]*133

        try:
            sd2 = schools_data.find_one({'KS5_11SCHNAME': record["\"SCH_SCHOOLNAME\""][1:-1]}, {'_id': 0})
            if sd2 != None: sd = map((lambda k: sd2[k]), ["LURN", "LLA", "LESTAB", "LLAESTAB", "LSCHNAME", "LSTREET", "LLOCALITY", "LADDRESS3", "LTOWN", "LPOSTCODE", "LTELNUM", "LICLOSE", "LISNEW", "LMINORGROUP", "LNFTYPE", "LISPRIMARY", "LISSECONDARY", "LISPOST16", "LAGEL", "LAGEH", "LGENDER", "LSFGENDER", "LRELDENOM", "LADMPOL", "LNEWACFLAG", "KS5_11RECTYPE", "KS5_11ALPHAIND", "KS5_11REGION", "KS5_11LASORT", "KS5_11LEA", "KS5_11ESTAB", "KS5_11URN", "KS5_11SCHNAME_AC", "KS5_11SCHNAME", "KS5_11ADDRESS1", "KS5_11ADDRESS2", "KS5_11ADDRESS3", "KS5_11TOWN", "KS5_11PCODE", "KS5_11TELNUM", "KS5_11CONTFLAG", "KS5_11NFTYPE", "KS5_11RELDENOM", "KS5_11ADMPOL", "KS5_11GENDER1618", "KS5_11FEEDER", "KS5_11AGERANGE", "KS5_11ICLOSE", "KS5_11TABKS2", "KS5_11TAB15", "KS5_11EXAMCONF", "KS5_11DUMMY1", "KS5_11TPUP1618", "KS5_11TALLPUPA", "KS5_11TALLPPSA", "KS5_11TALLPPEA", "KS5_11PTPASS1L3", "KS5_11PTPASS2LV3", "KS5_11PTPASS3LV3", "KS5_11TALLPPS08", "KS5_11TALLPPS09", "KS5_11TALLPPS10", "KS5_11TALLPPE08", "KS5_11TALLPPE09", "KS5_11TALLPPE10", "ABS_11LA", "ABS_11ESTAB", "ABS_11URN", "ABS_11PERCTOT", "ABS_11PERCUA", "ABS_11PPERSABS15", "ABS_11PPERSABS20", "CFR_11URN", "CFR_11LANUMBER", "CFR_11LONDON/NON-LONDON", "CFR_11MEDIAN", "CFR_11PUPILS", "CFR_11FSM", "CFR_11FSMBAND", "CFR_11GRANTFUNDING", "CFR_11SELFGENINCOME", "CFR_11TOTALINCOME", "CFR_11TEACHINGSTAFF", "CFR_11SUPPLYTEACHERS", "CFR_11EDUCATIONSUPPORTSTAFF", "CFR_11PREMISES", "CFR_11BACKOFFICE", "CFR_11CATERING", "CFR_11OTHERSTAFF", "CFR_11ENERGY", "CFR_11LEARNINGRESOURCES", "CFR_11ICT", "CFR_11BOUGHTIN", "CFR_11OTHER", "CFR_11TOTALEXPENDITURE", "SWF_11LA", "SWF_11URN", "SWF_11NTEA", "SWF_11NTEAAS", "SWF_11NNONTEA", "SWF_11NFTETEA", "SWF_11NFTETEAAS", "SWF_11RATPUPTEA", "SWF_11SALARY", "CENSUS_11URN", "CENSUS_11LAESTAB", "CENSUS_11NUMFTE", "CENSUS_11TOTPUPSENDN", "CENSUS_11TSENSAP", "CENSUS_11TSENA", "CENSUS_11TOTSENST", "CENSUS_11TOTSENAP", "CENSUS_11PSENSAP", "CENSUS_11PSENA", "CENSUS_11PTOTSENST", "CENSUS_11PTOTSENAP", "CENSUS_11TOTPUPEALDN", "CENSUS_11NUMEAL", "CENSUS_11NUMENGFL", "CENSUS_11NUMUNCFL", "CENSUS_11PNUMEAL", "CENSUS_11PNUMENGFL", "CENSUS_11PNUMUNCFL", "CENSUS_11TOTPUPFSMDN", "CENSUS_11NUMFSM", "CENSUS_11NUMNOFSM", "CENSUS_11PNUMFSM", "CENSUS_11PNUMNOFSM", "OLA", "OURN", "OSCHOOLNAME", "OPHASE", "OREPORTURL"])
        except Exception as e:
            print "school details"
            print e

        x += sd

        school_dep_details = [""]*16

        try:
            school_postcode = record["\"SCH_POSTCODE\""][1:-1]
            school_lsoa_code = postcodes.find_one({'Postcode2': school_postcode}, {'Code':1})
            if school_lsoa_code != None:
                school_lsoa_code = school_lsoa_code['Code']
                school_dep_details = dep_2_mappings.find_one({'code': school_lsoa_code})['dep']
        except Exception as e:
            print "school deps"
            print e

        x += school_dep_details

        coll.insert({'x': x, 'y': y})
예제 #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'in_abs', help='input abstracts file path: "../*_abstracts.pickle" ')
    parser.add_argument(
        'out_stemmed_abs',
        default='../stemmed_abstracts.pickle',
        help='file path of abstracts output file: "stemmed_abstracts.pickle"')
    parser.add_argument(
        'out_words',
        default='../word_base.pickle',
        help='file path of words output file: "word_base.pickle"')

    args = parser.parse_args()

    print "loading abstracts..."
    abstracts_file = open(args.in_abs)
    abstracts = cPickle.load(abstracts_file)
    abstracts_file.close()

    words = []
    stemmed_abstracts = {}
    fish = ProgressFish(total=len(abstracts))

    cnt = 0
    print "reading all words..."
    for (key, abstract) in abstracts.items():
        sentence = wordpunct_tokenize(abstract.lower())
        new_sentence = []
        for word in sentence:
            if word.isalnum():
                stemmed_word = stem(word)
                words.append(stemmed_word)
                new_sentence.append(stemmed_word)
        stemmed_abstracts[key] = list(set(new_sentence))
        cnt += 1
        fish.animate(amount=cnt)

    print "removing duplicates"
    words = set(words)

    print "persisting word_base"
    word_base = open(args.out_words, 'w')
    cPickle.dump(words, word_base)
    word_base.close()

    print "persisting abstracts"
    stemmed_abstracts_file = open(args.out_stemmed_abs, 'w')
    cPickle.dump(stemmed_abstracts, stemmed_abstracts_file)
    abstracts_file.close()
예제 #7
0
파일: audit_scores.py 프로젝트: rerb/stars
def audit_scores(ss_id=None):
    """
        recalculates all scores and display the changes
    """

    reload(sys)
    sys.setdefaultencoding('utf8')

    if not ss_id:
        print "iterating all submission sets"
        cs = CreditSet.objects.get(pk=6)
        qs = SubmissionSet.objects.filter(status='r').filter(creditset=cs)
    else:
        print "auditing SS: %s" % ss_id
        qs = [SubmissionSet.objects.get(pk=ss_id)]

    display_table = []
    fish = ProgressFish(total=len(qs))

    count = 0
    for ss in qs:
        count += 1

        fish.animate(amount=count)

        # current_score = get_score_object(ss)
        # recalculate_all_scores(ss)
        # recalculated_score = get_score_object(ss)
        # compare scores
        s1 = get_score_obj(ss, credits=False)
        ss.get_STARS_score(recalculate=True)
        s2 = get_score_obj(ss, credits=False)
        compare_score_objects(s1, s2, display_table)

    #     current_score = round(ss.score, 2)
    #     recalculated_score = round(ss.get_STARS_score(recalculate=True), 2)
    #
    #     if abs(current_score - recalculated_score) > .1:
    #         display_table.append([
    #             ss, current_score, recalculated_score,
    #             current_score - recalculated_score, ss.date_submitted, ss.id])
    #
    # if display_table:
    print tabulate(display_table,
                   headers=[
                       'submission set', 'name', 'id', 'calculated_score',
                       'recalculated_score', 'delta'
                   ])
예제 #8
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('in_abs', help='input abstracts file path: "../*_abstracts.pickle" ')
	parser.add_argument('out_stemmed_abs', default='../stemmed_abstracts.pickle', help='file path of abstracts output file: "stemmed_abstracts.pickle"')
	parser.add_argument('out_words', default='../word_base.pickle', help='file path of words output file: "word_base.pickle"')

	args = parser.parse_args()

	
	
	print "loading abstracts..."
	abstracts_file = open(args.in_abs)
	abstracts = cPickle.load(abstracts_file)
	abstracts_file.close()

	
	words = []
	stemmed_abstracts = {}
	fish = ProgressFish(total=len(abstracts))
	
	cnt = 0
	print "reading all words..."
	for (key, abstract) in abstracts.items():
		sentence = wordpunct_tokenize(abstract.lower())
		new_sentence = []
		for word in sentence:
			if word.isalnum():
				stemmed_word = stem(word)
				words.append( stemmed_word )
				new_sentence.append( stemmed_word )
		stemmed_abstracts[key] = list(set(new_sentence))
		cnt += 1
		fish.animate(amount=cnt)

	print "removing duplicates"
	words = set(words)

	print "persisting word_base"
	word_base = open(args.out_words, 'w')
	cPickle.dump(words, word_base)
	word_base.close()

	print "persisting abstracts"
	stemmed_abstracts_file = open(args.out_stemmed_abs, 'w')
	cPickle.dump(stemmed_abstracts, stemmed_abstracts_file)
	abstracts_file.close()
예제 #9
0
def convert_to_list( abstracts, references ):
	no_of_docs = len(abstracts)
	if len(references) != no_of_docs:
		print 'abstracts and refs must have same size!'
	abs_list = list()
	refs_list = list()
	keys_list = list()
	print len(references)
	
	cnt = 1
	fish = ProgressFish(total=len(abstracts))
	for key in references.keys():
		fish.animate(amount=cnt)
		cnt +=1
		abs_list.append( abstracts[key] )
		refs_list.append( references[key] )
		keys_list.append( key )
	return abs_list, refs_list, keys_list
예제 #10
0
def convert_to_list(abstracts, references):
    no_of_docs = len(abstracts)
    if len(references) != no_of_docs:
        print 'abstracts and refs must have same size!'
    abs_list = list()
    refs_list = list()
    keys_list = list()
    print len(references)

    cnt = 1
    fish = ProgressFish(total=len(abstracts))
    for key in references.keys():
        fish.animate(amount=cnt)
        cnt += 1
        abs_list.append(abstracts[key])
        refs_list.append(references[key])
        keys_list.append(key)
    return abs_list, refs_list, keys_list
예제 #11
0
def abstracts_to_vector(abstracts, word_base):

    print 'converting abstracts...'
    cnt = 0
    word_base_dict = {}
    for word in word_base:
        word_base_dict[word] = cnt
        cnt += 1

    cnt = 0
    fish = ProgressFish(total=len(abstracts))
    for key, abstract in abstracts.items():
        vector_abstract = abstract_to_vector(abstract, word_base_dict)
        abstracts[key] = vector_abstract
        cnt += 1
        fish.animate(amount=cnt)

    return abstracts
예제 #12
0
def abstracts_to_vector( abstracts, word_base):
	
	print 'converting abstracts...'
	cnt = 0
	word_base_dict = {}
	for word in word_base:
		word_base_dict[word] = cnt
		cnt +=1

	cnt = 0
	fish = ProgressFish(total=len(abstracts))
	for key, abstract in abstracts.items():
		vector_abstract = abstract_to_vector( abstract, word_base_dict)
		abstracts[key] = vector_abstract
		cnt += 1
		fish.animate(amount=cnt)

	return abstracts
예제 #13
0
  def fit(self, x_train, y_train, x_dev=None, y_dev=None, batch_size=100):
    train_fn = theano.function(inputs=[self.batch_x, self.batch_y],
                               outputs=self.cost,
                               updates=self.updates,
                               givens={x: self.batch_x, y: self.batch_y})

    train_set_iterator = DatasetMiniBatchIterator(self.rng, x_train, y_train, batch_size=batch_size, randomize=True)
    dev_set_iterator = DatasetMiniBatchIterator(self.rng, x_dev, y_dev, batch_size=batch_size, randomize=False)

    train_score = self._batch_score(train_set_iterator)
    dev_score = self._batch_score(dev_set_iterator)

    best_dev_error = numpy.inf
    epoch = 0
    timer_train = time.time()
    while epoch < n_epochs:
        avg_costs = []
        timer = time.time()
        fish = ProgressFish(total=len(train_set_iterator))
        for i, (x, y) in enumerate(train_set_iterator, 1):
            fish.animate(amount=i)

            avg_cost = train_fn(x, y)
            if type(avg_cost) == list:
              avg_costs.append(avg_cost[0])
            else:
              avg_costs.append(avg_cost)

        mean_cost = numpy.mean(avg_costs)
        mean_train_error = numpy.mean(train_score())
        dev_error = numpy.mean(dev_score())
        print('epoch {} took {:.4f} seconds; '
              'avg costs: {:.4f}; train error: {:.4f}; '
              'dev error: {:.4f}'.format(epoch,time.time() - timer, mean_cost,
                                         mean_train_error, dev_error))

        if dev_error < best_dev_error:
            best_dev_error = dev_error
            best_params = [numpy.copy(p.get_value()) for p in params]
        epoch += 1

    print('Training took: {:.4f} seconds'.format(time.time() - timer_train))
    for i, param in enumerate(best_params):
      params[i].set_value(param, borrow=True)
예제 #14
0
def download_url(url, save_as):
    r = requests.get(url, cookies = {'_metacasts_session': SESSION_COOKIE}, stream=True)
    file_size = int(r.headers['content-length'])
    mime_type = r.headers['content-type']
    downloaded_bytes = 0

    file_name = save_as+guess_extension(mime_type)
    file_mode = 'wb'

    if( os.path.exists(file_name) ):
        existing_size = os.path.getsize(file_name)
        print 'size is {0} vs {1}'.format(existing_size, file_size)
        if( os.path.getsize(file_name) == file_size ):
            print file_name+" already exists. Skipping..."
            return
        else:
            print "File incomplete. Resuming..."
            file_mode = 'ab'
            r = requests.get(url, 
                             cookies = {'_metacasts_session': SESSION_COOKIE}, 
                             headers = {'Range': 'bytes={0}-'.format(existing_size)},
                             stream=True)
            downloaded_bytes = existing_size

            if( int(r.headers['content-length']) != file_size - existing_size ):
                print "File size mismatch. Reset download."
                os.remove(file_name)
                file_mode = 'wb'
                downloaded_bytes = 0
                

    with open(file_name, file_mode) as f:
        print "Downloading {0}...".format(file_name)
        pf = ProgressFish(total=file_size)
        for index, chunk in enumerate(r.iter_content(chunk_size=128*1024)):
            if chunk:
                downloaded_bytes += len(chunk)
                f.write(chunk)
                f.flush()
                pf.animate(amount=downloaded_bytes)

        print "{0} finished download".format(file_name)
예제 #15
0
def random_sampling( abstracts, references, no_of_entries):
	length = check_lengths( abstracts, references)
	percentage = float(no_of_entries)/length*100
	print 'reduce '+str(length)+' to '+str(no_of_entries)
	print "that's about "+str(percentage)+"% of the original size "


	fish = ProgressFish(total = int(no_of_entries) )

	key_list = abstracts.keys()
	random.shuffle(key_list)
	new_abs = {}
	new_refs = {}

	for i in range( int(no_of_entries) ):
		fish.animate(amount=i)
		choice = key_list.pop()
		new_abs[choice] = abstracts[choice]
		new_refs[choice] = references[choice]
	return new_abs, new_refs
예제 #16
0
def fetch(urls):
    now = get_datetime(gmtime())
    with futures.ThreadPoolExecutor(max_workers=5) as executor:
        future_to_url = {
            executor.submit(feedparser.parse, url): url
            for url in urls
        }

    fish = ProgressFish(total=len(urls))
    i = 0
    for future in futures.as_completed(future_to_url):
        url = future_to_url[future]
        if future.exception() is not None:
            log('Error reading %r: %s' % (url, future.exception()))
            continue

        feed = future.result()
        _feed, created = Feed.objects.get_or_create(url=url)
        for entry in feed.entries:
            try:
                pub_time = get_datetime(entry.date_parsed)
            except AttributeError:
                log('No date_parsed attribute on entry')
                continue
            except:
                log('Error reading entry date')
                continue

            try:
                assert pub_time - now < TIME_EPSILON
            except AssertionError:
                print >> sys.stderr, 'Entry is from future? %s %s' % (pub_time,
                                                                      url)

            _entry, created = Entry.objects.get_or_create(feed=_feed,
                                                          pub_time=pub_time)
            for item in get_items(entry):
                Item.objects.get_or_create(value=item, entry=_entry)

        fish.animate(amount=i)
        i += 1
예제 #17
0
def main():
	source = open("../dense_abstracts.pickle")
	out_file = open('../english_abstracts.pickle', 'w')
	out_diff = open('../english_abstracts_diff.pickle', 'w')
	print str(datetime.now())+ "filterNonEnglishAbstracts.py - deletes entries with non-English abstracts "

	if not source:
		print "This works.... NOT"
		return -1
	
	deleted_abstract_ids = []
	print "reading abstracts..."
	abstracts = cPickle.load(source)
	abs_tobeginwith = float(len(abstracts))

	print "deleting non-English ones"
	empty_cnt = 0

	fish = ProgressFish(total = len(abstracts))
	count = 0
	for article_id, abstract in abstracts.items():
		count += 1
		fish.animate(amount=count)
		if 'en' != guessLanguage( abstract ):
			empty_cnt += 1
			deleted_abstract_ids.append(article_id)
			del abstracts[article_id]

	print str(datetime.now())+' starting to persist references to: '+ out_file.name +' and '+out_diff.name
	print "deleted "+str(empty_cnt)+" documents"
	print "that's "+str( empty_cnt / abs_tobeginwith )+"%"

	cPickle.dump(abstracts, out_file, -1)
	cPickle.dump(deleted_abstract_ids, out_diff, -1)

	source.close()
	out_file.close()
	out_diff.close()
예제 #18
0
def most_refs_sampling( abstracts, references, no_of_entries):
	no_of_entries = int(no_of_entries)
	length = check_lengths( abstracts, references)
	percentage = float(no_of_entries)/length*100
	print 'reduce '+str(length)+' to '+str(no_of_entries)
	print "that's about "+str(percentage)+"% of the original size "

	fish = ProgressFish(total = int(no_of_entries) )

	new_abs = {}
	new_refs = {}
	count_refs = {}

	for key, refs in references.items():
		count_refs[key] = len(refs)
	refs_sorted_by_count = sorted(count_refs.iteritems(), key=operator.itemgetter(1))

	for i in range( int(no_of_entries) ):
		fish.animate(amount=i)
		(choice, count) = refs_sorted_by_count.pop()
		new_abs[choice] = abstracts[choice]
		new_refs[choice] = references[choice]
	return new_abs, new_refs
def wait_for_completed_transfer(mountpoint, timeout_in_s = None):
    print "waiting for completed upload"
    if timeout_in_s is not None:
        print "waiting at most %s min" % (timeout_in_s/60)
    else:
        timeout_in_s = float("inf")
    CLOUDFUSION_NOT_UPLOADED_PATH = mountpoint + "/stats/notuploaded"
    time_waited = 0
    if os.path.exists(CLOUDFUSION_NOT_UPLOADED_PATH):
        if timeout_in_s == float("inf"):
            fsh = ProgressFish(total=10000000000)
        else:
            fsh = ProgressFish(total=timeout_in_s)
        while os.path.getsize(CLOUDFUSION_NOT_UPLOADED_PATH) > 0:
            sleep(10)
            time_waited += 10
            fsh.animate(amount=time_waited)
            if time_waited > timeout_in_s:
                break
        return
    print ""
    
    start = time.time()
    
    def no_network_activity(line):
        try:
            kbit_per_5min = sum(map(int, line.split()))
            if kbit_per_5min < 200:
                return True
        except ValueError:
            pass
        if start + timeout_in_s < time.time():
            return True
        return False
    p = ifstat('-bzn', '600', _out=(lambda x:no_network_activity ))
    p.wait()
    p.kill()
예제 #20
0
파일: fetch.py 프로젝트: dandavison/feeder
def fetch(urls):
    now = get_datetime(gmtime())
    with futures.ThreadPoolExecutor(max_workers=5) as executor:
        future_to_url = {executor.submit(feedparser.parse, url): url
                         for url in urls}

    fish = ProgressFish(total=len(urls))
    i = 0
    for future in futures.as_completed(future_to_url):
        url = future_to_url[future]
        if future.exception() is not None:
            log('Error reading %r: %s' % (url, future.exception()))
            continue

        feed = future.result()
        _feed, created = Feed.objects.get_or_create(url=url)
        for entry in feed.entries:
            try:
                pub_time = get_datetime(entry.date_parsed)
            except AttributeError:
                log('No date_parsed attribute on entry')
                continue
            except:
                log('Error reading entry date')
                continue

            try:
                assert pub_time - now < TIME_EPSILON
            except AssertionError:
                print >>sys.stderr, 'Entry is from future? %s %s' % (pub_time, url)

            _entry, created = Entry.objects.get_or_create(feed=_feed, pub_time=pub_time)
            for item in get_items(entry):
                Item.objects.get_or_create(value=item, entry=_entry)

        fish.animate(amount=i)
        i += 1
예제 #21
0
    def handle(self, *args, **options):

        conn = boto.connect_s3()
        src = conn.get_bucket('aashe-hub-dev')
        dst = conn.get_bucket('aashe-hub-production')
        dst_keys = [k.key for k in dst.list()]

        print "Copying all Files..."

        file_qs = File.objects.filter(item__isnull=False)
        fish = ProgressFish(total=file_qs.count())
        count = 0
        for f in file_qs:
            count += 1
            fish.animate(amount=count)
            key = urlparse(f.item).path[1:]
            # if it doesn't already exist:
            if key not in dst_keys:
                dst.copy_key(key, src.name, key)

        print "Copying all Images..."

        image_qs = Image.objects.filter(image__isnull=False)
        fish2 = ProgressFish(total=image_qs.count())
        count = 0
        for i in image_qs:
            count += 1
            fish2.animate(amount=count)
            key = urlparse(i.image).path[1:]
            # if it doesn't already exist:
            if key not in dst_keys:
                try:
                    dst.copy_key(key, src.name, key)
                except boto.exception.S3ResponseError:
                    print "**** failed to copy: %s" % key

        print
예제 #22
0
def main(config_path, desc_path, target_path):
    massaged = io.StringIO()
    with io.open(config_path, 'rU') as infile:
        massaged.writelines(line.lstrip() for line in infile)
    massaged.seek(0)
    config = RawConfigParser()
    config.readfp(massaged)
    lfs_url = config.get('lfs', 'url').strip('"')
    api_url = posixpath.join(lfs_url, 'objects', 'batch')

    with io.open(desc_path, 'rU') as infile:
        target = dict(line.strip().partition(' ')[::2] for line in infile)
    if target.get('version') != 'https://git-lfs.github.com/spec/v1':
        raise ValueError("can't handle lfs", target['version'])
    oid_type, sep, oid = target['oid'].partition(':')
    if oid_type != 'sha256':
        raise ValueError("can't handle oid", target['oid'])
    size = int(target['size'])

    sys.stderr.write('Fetching {!r} from lfs...\n'.format(
        os.path.basename(target_path)))
    try:
        infile = open(target_path, 'rb')
    except IOError as e:
        if e.errno != errno.ENOENT:
            raise
    else:
        if file_matches(infile, size, oid):
            sys.stderr.write('Lucky! It was already up to date.\n')
            return

    req = Request(api_url, json.dumps({
        'operation': 'download',
        'objects': [{
            'oid': oid,
            'size': size,
        }],
    }).encode(), {
        'Accept': JSON_TYPE,
        'Content-Type': JSON_TYPE,
    })
    with contextlib.closing(urlopen(req)) as respfile:
        if WRAP_RESPFILE:
            respfile = io.TextIOWrapper(respfile)
        resp = json.load(respfile)

    url = next(obj['actions']['download']['href']
               for obj in resp['objects']
               if obj['oid'] == oid)
    with contextlib.closing(urlopen(url)) as respfile:
        hasher = hashlib.sha256()
        with tempfile.NamedTemporaryFile(
                dir=os.path.dirname(target_path)) as outfile:
            fish = ProgressFish(total=size)
            fetched = 0
            for chunk in iter(lambda: respfile.read(8192), b''):
                fetched += len(chunk)
                fish.animate(amount=fetched)
                hasher.update(chunk)
                outfile.write(chunk)
            if hasher.hexdigest() != oid:
                raise ValueError('hash failure', hasher.hexdigest(), oid)
            os.rename(outfile.name, target_path)
            open(outfile.name, 'w').close()
예제 #23
0
def progress(iterable, **kwargs):
    fish = ProgressFish(**kwargs)
    for i, item in enumerate(iterable):
        yield item
        fish.animate(amount=i)
예제 #24
0
                c.description)

            if urls != []:
                chapter_data = {'name': c.name, 'urls': urls}
                chapters_with_web_sites.append(chapter_data)
            else:
                chapter_data = {'name': c.name, 'description': c.description}
                chapters_to_check.append(chapter_data)

    else:
        pass

sites_with_links_back = []
sites_with_no_links_back = []

fish = ProgressFish(total=len(chapters_with_web_sites))
for i, c in enumerate(chapters_with_web_sites):
    req = c['urls'][0]
    try:
        r = br.open(req)
        fish.animate(amount=i)
    except urllib2.HTTPError, e:
        print e.code
        continue
    c['urls'] = r.geturl()
    doc = r.read()
    soup = BeautifulSoup.BeautifulSoup(doc)
    try:
        href = soup.findAll('a',
                            {'href': re.compile('thezeitgeistmovement.com')})
        if href:
예제 #25
0
def do_records(records):
    num_records = LIMIT
    fish = ProgressFish(total=num_records)
    for i, record in enumerate(records):
        fish.animate(amount=i)
        keys = record.keys()
        x = get_o_list(record, x_keys)
        y = get_o_list(record, y_keys)

        y_count = len(
            filter((lambda r: r != "\"\"" and r != "''" and r != ""), y))
        x_count = len(
            filter((lambda r: r != "\"\"" and r != "''" and r != ""), x))

        if y_count == 0 or x_count == 0: continue

        home_dep_details = [""] * 16

        try:
            home_lsoa_code = record['"CEN_LSOA"'][1:-1]
            if home_lsoa_code != None and home_lsoa_code != '':
                home_dep_details = dep_2_mappings.find_one(
                    {'code': home_lsoa_code})['dep']
        except Exception as e:
            print "home - probably wales/scotland"
            print e

        x += home_dep_details

        sd = [""] * 133

        try:
            sd2 = schools_data.find_one(
                {'KS5_11SCHNAME': record["\"SCH_SCHOOLNAME\""][1:-1]},
                {'_id': 0})
            if sd2 != None:
                sd = map((lambda k: sd2[k]), [
                    "LURN", "LLA", "LESTAB", "LLAESTAB", "LSCHNAME", "LSTREET",
                    "LLOCALITY", "LADDRESS3", "LTOWN", "LPOSTCODE", "LTELNUM",
                    "LICLOSE", "LISNEW", "LMINORGROUP", "LNFTYPE",
                    "LISPRIMARY", "LISSECONDARY", "LISPOST16", "LAGEL",
                    "LAGEH", "LGENDER", "LSFGENDER", "LRELDENOM", "LADMPOL",
                    "LNEWACFLAG", "KS5_11RECTYPE", "KS5_11ALPHAIND",
                    "KS5_11REGION", "KS5_11LASORT", "KS5_11LEA", "KS5_11ESTAB",
                    "KS5_11URN", "KS5_11SCHNAME_AC", "KS5_11SCHNAME",
                    "KS5_11ADDRESS1", "KS5_11ADDRESS2", "KS5_11ADDRESS3",
                    "KS5_11TOWN", "KS5_11PCODE", "KS5_11TELNUM",
                    "KS5_11CONTFLAG", "KS5_11NFTYPE", "KS5_11RELDENOM",
                    "KS5_11ADMPOL", "KS5_11GENDER1618", "KS5_11FEEDER",
                    "KS5_11AGERANGE", "KS5_11ICLOSE", "KS5_11TABKS2",
                    "KS5_11TAB15", "KS5_11EXAMCONF", "KS5_11DUMMY1",
                    "KS5_11TPUP1618", "KS5_11TALLPUPA", "KS5_11TALLPPSA",
                    "KS5_11TALLPPEA", "KS5_11PTPASS1L3", "KS5_11PTPASS2LV3",
                    "KS5_11PTPASS3LV3", "KS5_11TALLPPS08", "KS5_11TALLPPS09",
                    "KS5_11TALLPPS10", "KS5_11TALLPPE08", "KS5_11TALLPPE09",
                    "KS5_11TALLPPE10", "ABS_11LA", "ABS_11ESTAB", "ABS_11URN",
                    "ABS_11PERCTOT", "ABS_11PERCUA", "ABS_11PPERSABS15",
                    "ABS_11PPERSABS20", "CFR_11URN", "CFR_11LANUMBER",
                    "CFR_11LONDON/NON-LONDON", "CFR_11MEDIAN", "CFR_11PUPILS",
                    "CFR_11FSM", "CFR_11FSMBAND", "CFR_11GRANTFUNDING",
                    "CFR_11SELFGENINCOME", "CFR_11TOTALINCOME",
                    "CFR_11TEACHINGSTAFF", "CFR_11SUPPLYTEACHERS",
                    "CFR_11EDUCATIONSUPPORTSTAFF", "CFR_11PREMISES",
                    "CFR_11BACKOFFICE", "CFR_11CATERING", "CFR_11OTHERSTAFF",
                    "CFR_11ENERGY", "CFR_11LEARNINGRESOURCES", "CFR_11ICT",
                    "CFR_11BOUGHTIN", "CFR_11OTHER", "CFR_11TOTALEXPENDITURE",
                    "SWF_11LA", "SWF_11URN", "SWF_11NTEA", "SWF_11NTEAAS",
                    "SWF_11NNONTEA", "SWF_11NFTETEA", "SWF_11NFTETEAAS",
                    "SWF_11RATPUPTEA", "SWF_11SALARY", "CENSUS_11URN",
                    "CENSUS_11LAESTAB", "CENSUS_11NUMFTE",
                    "CENSUS_11TOTPUPSENDN", "CENSUS_11TSENSAP",
                    "CENSUS_11TSENA", "CENSUS_11TOTSENST", "CENSUS_11TOTSENAP",
                    "CENSUS_11PSENSAP", "CENSUS_11PSENA", "CENSUS_11PTOTSENST",
                    "CENSUS_11PTOTSENAP", "CENSUS_11TOTPUPEALDN",
                    "CENSUS_11NUMEAL", "CENSUS_11NUMENGFL",
                    "CENSUS_11NUMUNCFL", "CENSUS_11PNUMEAL",
                    "CENSUS_11PNUMENGFL", "CENSUS_11PNUMUNCFL",
                    "CENSUS_11TOTPUPFSMDN", "CENSUS_11NUMFSM",
                    "CENSUS_11NUMNOFSM", "CENSUS_11PNUMFSM",
                    "CENSUS_11PNUMNOFSM", "OLA", "OURN", "OSCHOOLNAME",
                    "OPHASE", "OREPORTURL"
                ])
        except Exception as e:
            print "school details"
            print e

        x += sd

        school_dep_details = [""] * 16

        try:
            school_postcode = record["\"SCH_POSTCODE\""][1:-1]
            school_lsoa_code = postcodes.find_one(
                {'Postcode2': school_postcode}, {'Code': 1})
            if school_lsoa_code != None:
                school_lsoa_code = school_lsoa_code['Code']
                school_dep_details = dep_2_mappings.find_one(
                    {'code': school_lsoa_code})['dep']
        except Exception as e:
            print "school deps"
            print e

        x += school_dep_details

        coll.insert({'x': x, 'y': y})
예제 #26
0
                           unemployments.find())
unemployment_average = sum(unemployment_average) / len(unemployment_average)

crime_average = map(lambda x: float(x['Total']), crimes.find())
crime_average = sum(crime_average) / len(crime_average)

turnout_average = map(lambda x: float(x['TurnoutPercentage'].strip('%')),
                      election_results.find())
turnout_average = sum(turnout_average) / len(turnout_average)

expense_average = map(lambda x: float(x['Total Claimed']), expenses.find())
expense_average = sum(expense_average) / len(expense_average)

LIMIT = int(1e4)

fish = ProgressFish(total=ms2010.count())
data = ms2010.find({"towhy": "still_in_office"}).limit(LIMIT)
for i, datum in enumerate(data):
    fish.animate(amount=i)
    mp = {}

    mp['first_name'] = datum['firstname']
    mp['last_name'] = datum['lastname']
    mp['party'] = datum['party']

    prev_datum = ms.find_one({
        'constituency': datum['constituency'],
        'todate': '2010-04-12'
    })
    mp_change = prev_datum == None or prev_datum['firstname'] != datum[
        'firstname'] or prev_datum['lastname'] != datum['lastname']
예제 #27
0
            x = float(x)
        except ValueError as e:
            return -1
    else:
        try:
            x = mapping[x]
        except KeyError as e:
            # print "skipping due to not in mapping"
            return -1

    return x

num = coll.count() if coll.count() < LIMIT else LIMIT
X = np.zeros((num, 266))
Y = np.zeros((num, 183))

data = coll.find().limit(LIMIT)
fish = ProgressFish(total=LIMIT)
for i, record in enumerate(data):
     fish.animate(amount=i)
     X[i] = map(x_to_num, enumerate(record['x']))
     Y[i] = map(y_to_num, record['y'])

data_x = open("DATA_X", "w")
cPickle.dump(X, data_x)
data_x.close()

data_y = open("DATA_Y", "w")
cPickle.dump(Y, data_y)
data_y.close()
예제 #28
0
import pandas
import io
from fish import ProgressFish

import utils

base_url = "http://api.openhluttaw.org"

df = pandas.DataFrame.from_csv('data/posts.csv',header=0,index_col=False)
df = df.where((pandas.notnull(df)), None)

del df['popit_id']

posts = df.to_dict(orient='records')

fish = ProgressFish(total=len(posts))

for progress,post in enumerate(posts):

	pyithu = [ i.strip() for i in post['constituency_pyithu'].split(',')]
	amyotha  = [i.strip() for i in post['constituency_amyotha'].split(',')]
	
	pyithu_ids = [ utils.search_post(id,base_url)['id'] for id in pyithu ]
	amyotha_ids = [ utils.search_post(id,base_url)['id'] for id in amyotha ]

        pyithu_list = []

        for pyithu_id in pyithu_ids:
            r = requests.get(base_url+'/en/posts/'+ pyithu_id)
            en_label = r.json()['result']['label']
            r = requests.get(base_url+'/my/posts/'+ pyithu_id)
예제 #29
0
import pymongo, simplejson, urllib, csv, time
from fish import ProgressFish

conn = pymongo.Connection()
db = conn.derisive
coll = db.schools_data

fields = open('mongo_hashtables.txt', 'r').read().split("\n")[0:-1]
fish = ProgressFish(total=len(fields))
for i, field in enumerate(fields):
    fish.animate(amount=i)
    uniques = "\n".join(coll.distinct(field))
    open("hashtables/%s" % field.replace("/", ""), 'w').write(uniques)

LIMIT = 1e4
LIMIT = int(LIMIT)

conn = pymongo.Connection()
db = conn.derisive
def analyze_long_pulse_data_file(filepath,
                                 save=0,
                                 plot_steps=0,
                                 new=1,
                                 starttime=0,
                                 endtime=0):
    """
    analyzes timeseries of a pulse fish EOD recording
    """
    #    Script to detect and classify EODs in recordings of weakly electric pulse
    #    fish, Dexter Früh, 2018
    #
    #    results will be saved in workingdirectory/recording/
    #
    #    input:
    #      -  [Recorded Timeseries] recording.WAV
    #    outputs(optional):
    #      -  [Detected and Classified EODs]
    #            (Numpy Array with Shape (Number of EODs, 4 (Attributes of EODs)),
    #            with the EOD-Attributes
    #               -   x-location of the EOD
    #                       (time/x-coordinate/datapoint in recording)
    #               -   y-location of the EOD
    #                       (Amplitude of the positive peak of the pulse-EOD)
    #               -   height of the EOD(largest distance between peak and through in the EOD)
    #               -   class of the EOD
    #           eods_recording.npy
    #      -   [plots of the results of each analyse step for each
    #               analysepart (timeinterval of length = deltat) of the recording]
    #
    #    required command line arguments at function call
    #        - save  : if True, save the results to a numpy file (possibly
    #                                                          overwrite existing)
    #        - plot  : if True, plot results in each analysestep
    #        - new   : if True, do a new analysis of the recording, even if there
    #                       is an existing analyzed .npy file with the right name.
    #
    import sys
    import numpy as np
    import copy
    from scipy.stats import gmean
    from scipy import stats
    from scipy import signal
    from scipy import optimize
    import matplotlib
    from fish import ProgressFish
    import matplotlib.pyplot as plt
    from thunderfish.dataloader import open_data
    from thunderfish.peakdetection import detect_peaks
    from scipy.interpolate import interp1d
    from scipy.signal import savgol_filter
    from collections import deque
    import ntpath
    import nixio as nix
    import time
    import os
    from shutil import copy2
    from ownDataStructures import Peak, Tr, Peaklist
    import DextersThunderfishAddition as dta
    from IPython import embed
    # parameters for the analysis

    deltat = 30.0  # seconds of buffer size
    thresh = 0.04  # minimal threshold for peakdetection
    peakwidth = 20  # width of a peak and minimal distance between two EODs
    # basic parameters for thunderfish.dataloader.open_data
    verbose = 0
    channel = 0
    ultimate_threshold = thresh + 0.01
    startblock = 0
    # timeinterval to analyze other than the whole recording
    #starttime = 0
    #endtime = 0
    #timegiven =  0
    home = os.path.expanduser('~')
    os.chdir(home)
    new = int(sys.argv[4])
    save = int(sys.argv[2])
    plot = int(sys.argv[3])
    starttime = int(starttime)
    endtime = int(endtime)
    timegiven = False
    if endtime > starttime >= 0:
        timegiven = True
    peaks = np.array([])
    troughs = np.array([])
    filename = path_leaf(filepath)
    datasavepath = filename[:-4]
    proceed = input(
        'Currently operates in home directory. If given a pulsefish recording filename.WAV, then a folder filename/ will be created in the home directory and all relevant files will be stored there. continue? [y/n] '
    ).lower()
    if proceed != 'y':
        quit()
    if not os.path.exists(datasavepath):
        os.makedirs(datasavepath)
    if save == 1:
        print('files will be saved to: ', datasavepath)
    eods_len = 0
    # starting analysis
    if new == 1 or not os.path.exists(filename[:-4] + "/eods5_" +
                                      filename[:-3] + "npy"):
        if filepath != home + '/' + datasavepath + '/' + filename:
            print(filepath, datasavepath + '/' + filename)
            proceed = input(
                'Copy datafile to ' + datasavepath +
                ' where all the other files will be stored? [y/n] ').lower()
            if proceed == 'y':
                copy2(filepath, datasavepath)
        # import data
        with open_data(filepath, channel, deltat, 0.0, verbose) as data:
            samplerate = data.samplerate
            nblock = int(deltat * data.samplerate)

            # selected time interval
            if timegiven == True:
                parttime1 = starttime * samplerate
                parttime2 = endtime * samplerate
                data = data[parttime1:parttime2]

            #split data into blocks
            if len(data) % nblock != 0:
                blockamount = len(data) // nblock + 1
            else:
                blockamount = len(data) // nblock

            # progress bar
            print('blockamount: ', blockamount)
            progress = 0
            print(progress, '%', flush=True, end=" ")
            fish = ProgressFish(total=blockamount)

            # blockwise analysis
            for idx in range(0, blockamount):
                blockdata = data[idx * nblock:(idx + 1) * nblock]
                # progressbar
                if progress < (idx * 100 // blockamount):
                    progress = (idx * 100) // blockamount
                progressstr = ' Filestatus: '
                fish.animate(amount=idx, dexextra=progressstr)
                #---analysis-----------------------------------------------------------------------
                # step1: detect peaks in timeseries
                pk, tr = detect_peaks(blockdata, thresh)
                troughs = tr
                # continue with analysis only if multiple peaks are detected
                if len(pk) > 3:
                    peaks = dta.makeeventlist(pk, tr, blockdata, peakwidth)

                    #dta.plot_events_on_data(peaks, blockdata)
                    peakindices, peakx, peakh = dta.discardnearbyevents(
                        peaks[0], peaks[1], peakwidth)
                    peaks = peaks[:, peakindices]

                    if len(peaks) > 0:
                        # used to connect the results of the current block with the previous
                        if idx > startblock:
                            peaklist = dta.connect_blocks(peaklist)
                        else:
                            peaklist = Peaklist([])
                        aligned_snips = dta.cut_snippets(blockdata,
                                                         peaks[0],
                                                         15,
                                                         int_met="cubic",
                                                         int_fact=10,
                                                         max_offset=1.5)
                        pcs = dta.pc(
                            aligned_snips)  #pc_refactor(aligned_snips)
                        order = 5
                        minpeaks = 3 if deltat < 2 else 10
                        labels = dta.cluster_events(pcs,
                                                    peaks,
                                                    order,
                                                    0.4,
                                                    minpeaks,
                                                    False,
                                                    method='DBSCAN')
                        peaks = np.append(peaks, [labels], axis=0)
                        #dta.plot_events_on_data(peaks, blockdata)
                        num = 1
                        if idx > startblock:
                            dta.alignclusterlabels(labels,
                                                   peaklist,
                                                   peaks,
                                                   data=blockdata)
                        peaks, peaklist = dta.ampwalkclassify3_refactor(
                            peaks, peaklist)  # classification by amplitude
                        minlen = 6  # >=1
                        peaks = dta.discard_short_classes(peaks, minlen)
                        if len(peaks[0]) > 0:
                            peaks = dta.discard_wave_pulses(peaks, blockdata)
                        # plots the data part and its detected and classified peaks
                        if plot_steps == True:
                            dta.plot_events_on_data(peaks, blockdata)
                            pass
                    worldpeaks = np.copy(peaks)
                    # change peaks location in the buffered part to the location relative to the
                    peaklist.len = nblock
                    # peaklocations relative to whole recording
                    worldpeaks[0] = worldpeaks[0] + (idx * nblock)
                    thisblock_eods = np.delete(peaks, 3, 0)
                    # save the peaks of the current buffered part to a numpy-memmap on the disk
                    mmpname = "eods_" + filename[:-3] + "npmmp"
                    save_EOD_events_to_npmmp(thisblock_eods, eods_len,
                                             idx == startblock, datasavepath,
                                             mmpname)
                    eods_len += len(thisblock_eods[0])
        # after the last buffered part has finished, save the memory mapped
        # numpy file of the detected and classified EODs to a .npy file to the
        # disk
        eods = np.memmap(datasavepath + "/eods_" + filename[:-3] + "npmmp",
                         dtype='float64',
                         mode='r+',
                         shape=(4, eods_len),
                         order='F')
        if save == 1:
            path = datasavepath + "/"
            if not os.path.exists(path):
                os.makedirs(path)
            if eods_len > 0:
                print('Saved!')
                np.save(datasavepath + "/eods8_" + datasavepath + "npy", eods)
            else:
                #np.save(filename[:-4]+"/eods5_"+filename[:-3]+"npy", thisblock_eods)

                print('not saved')
    else:  # if there already has been a certain existing result file and 'new' was set to False
        print('already analyzed')
    print(
        'returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude'
    )
    return eods
예제 #31
0
            'kind', 'runtimes', 'year', 'title']
    # if in_map.has_key('kind') == True:
    #     print in_map['kind'], type(in_map['kind'])
    for k in keys:
        if in_map.has_key(k):
            if type(in_map[k]) == int or type(in_map[k]) == float:
                in_dict[0].update({k: in_map[k]})
            if type(in_map[k]) == unicode:
                in_dict[0].update({k: in_map[k].encode('utf-8')})
            # if in_map.has_key('kind'):
            #     pdb.set_trace()
            if type(in_map[k]) == list:
                handle_lists(in_map[k], in_dict, k)

result = []
fish = ProgressFish(total=len(movie_ids))

for index, i in enumerate(movie_ids):
    # if index%100 == 0:
    #     print "On movie number: ", index
    fish.animate(amount=index)
    m = im.get_movie(i)
    maps = {}
    for keys in m.iterkeys():
      #  print keys, m[keys]
        maps.update({keys:m[keys]})
        dicts = [{}]

    if maps != {} : ## To ensure that maps are filled, otherwise dicts is not defined! 
        makerows(maps, dicts)
#        print dicts
예제 #32
0
'''
VirtualBox

'''

import uuid
import time

from fish import ProgressFish
from vboxapi import VirtualBoxManager

fish = ProgressFish(total=100)
manager = VirtualBoxManager(None, None)
vbox = manager.vbox


def createMachine(name, uuid=None, settings_file=None, groups=[],
                  os_type_id='Debian', flags='', force_overwrite=False):
    ''' Create virtual machine

    Version 4.1.18:

    :param settingsFile: Fully qualified path where the settings file should
                         be created, or NULL for a default folder and file
                         based on the name argument.
    :param name: Machine name.
    :param osTypeId: Guest OS Type ID.
    :param id: Machine UUID (optional).
    :param forceOverwrite: If true, an existing machine settings file will be
                           overwritten.
    :returns: IProgress
예제 #33
0

records = list(
    get_p(ks51.find({}, {
        '"SCH_POSTCODE"': 1
    }).limit(LIMIT)).intersection(
        get_p(ks52.find({}, {
            '"SCH_POSTCODE"': 1
        }).limit(LIMIT))))
postcodes = map((lambda r: r[1:-1].replace(' ', '')), records)

print len(postcodes)

i = 0

fish = ProgressFish(total=len(postcodes))

for postcode in postcodes:
    time.sleep(0.1)
    fish.animate(amount=i)
    try:
        i += 1
        url = "http://mapit.mysociety.org/postcode/%s" % postcode
        result = simplejson.load(urllib.urlopen(url))
        lsoa_code = filter((lambda area: area['type_name'] ==
                            "Lower Layer Super Output Area (Full)"),
                           result['areas'].values())[0]['name']
        mappings.insert({'lsoa_code': lsoa_code, 'postcode': postcode})

    except Exception as e:
        print "NOO"
예제 #34
0
ks51 = db.ks5_0910
ks52 = db.ks5_1011
mappings = db.postcode_lsoa
mappings.drop()

def get_p(di): return set(map((lambda r: r['"SCH_POSTCODE"']), di))

records = list(get_p(ks51.find({},{'"SCH_POSTCODE"':1}).limit(LIMIT)).intersection(get_p(ks52.find({},{'"SCH_POSTCODE"':1}).limit(LIMIT))))
postcodes = map((lambda r: r[1:-1].replace(' ', '')), records)
 
print len(postcodes)

i = 0

fish = ProgressFish(total=len(postcodes))

for postcode in postcodes:
    time.sleep(0.1)
    fish.animate(amount=i)
    try:
        i += 1
        url = "http://mapit.mysociety.org/postcode/%s" % postcode
        result = simplejson.load(urllib.urlopen(url))
        lsoa_code = filter((lambda area: area['type_name'] == "Lower Layer Super Output Area (Full)"), result['areas'].values())[0]['name']
        mappings.insert({'lsoa_code': lsoa_code, 'postcode': postcode})

    except Exception as e:
        print "NOO"

mappings.create_index([("postcode", pymongo.ASCENDING)])
def analyze_pulse_data(filepath,
                       absolutepath=True,
                       deltat=30,
                       thresh=0.04,
                       starttime=0,
                       endtime=0,
                       savepath=False,
                       save=False,
                       npmmp=False,
                       plot_steps=False,
                       plot_result=False):
    '''
    analyzes timeseries of a pulse fish EOD recording

    Parameters
    ----------
    filepath: WAV-file with the recorded timeseries

    deltat: int, optional
        time for a single analysisblock (recommended less than a minute, due to principal component clustering on the EOD-waveforms)

    thresh: float, optional
        minimum threshold for the peakdetection (if computing frequencies recommended a tiny bit lower than the wished threshold, and instead discard the EOD below the wished threshold after computing the frequencies for each EOD.)

    starttime: int or, str of int, optional
        time into the data from where to start the analysis, seconds.

    endtime: int or str of int, optional
        time into the data where to end the analysis, seconds, larger than starttime.

    savepath = Boolean or str, optional
        path to where to save results and intermediate result, only needed if save or npmmp is True.
        string to specify a relative path to the directory where results and intermediate results will bed
        or False to use preset savepath, which is ~/filepath/
        or True to specify savepath as input when the script is running

    save: Boolean, optional
        True to save the results into a npy file at the savepath

    npmmp: Boolean, optional
        True to save intermediate results into a npmmp at the savepath, only recommended in case of memory overflow

    plot_steps: Boolean, optional
        True to plot the results of each analysis block

    plot_results: Boolean, optional
        True to plot the results of the final analysis. Not recommended for long recordings due to %TODO

    Returns
    -------
    eods: numpy array
        2D numpy array. first axis: attributes of an EOD (x (datapoints), y (recorded voltage), height (difference from maximum to minimum), class), second axis: EODs in chronological order.
    '''
    import sys
    import numpy as np
    import copy
    from scipy.stats import gmean
    from scipy import stats
    from scipy import signal
    from scipy import optimize
    import matplotlib
    from fish import ProgressFish
    import matplotlib.pyplot as plt
    from thunderfish.dataloader import open_data
    from thunderfish.peakdetection import detect_peaks
    from scipy.interpolate import interp1d
    from scipy.signal import savgol_filter
    from collections import deque
    import ntpath
    import nixio as nix
    import time
    import os
    from shutil import copy2
    from ownDataStructures import Peak, Tr, Peaklist
    import DextersThunderfishAddition as dta
    from IPython import embed
    # parameters for the analysis
    thresh = 0.04  # minimal threshold for peakdetection
    peakwidth = 20  # width of a peak and minimal distance between two EODs
    # basic parameters for thunderfish.dataloader.open_data
    verbose = 0
    channel = 0
    ultimate_threshold = thresh + 0.01
    startblock = 0
    # timeinterval to analyze other than the whole recording
    #starttime = 0
    #endtime = 0
    #timegiven =  0
    home = os.path.expanduser('~')
    if absolutepath:
        filepath = home + '/' + filepath
    #os.chdir(home)
    #save = int(save)
    #plot_steps = int(plot_steps)
    starttime = int(starttime)
    endtime = int(endtime)
    timegiven = False
    if endtime > starttime >= 0:
        timegiven = True
    peaks = np.array([])
    troughs = np.array([])
    filename = path_leaf(filepath)
    eods_len = 0
    if savepath == False:
        datasavepath = home + '/' + filename[:-4]
    elif savepath == True:
        datasavepath = input(
            'With the option npmmp enabled, a numpy memmap will be saved to: '
        ).lower()
    else:
        datasavepath = savepath

    if save and (
            os.path.exists(datasavepath + "/eods8_" + filename[:-3] + "npy") or
            os.path.exists(datasavepath + "/eods5_" + filename[:-3] + "npy")):
        print(
            'there already exists an analyzed file, aborting. Change the code if you don\'t want to abort'
        )
        quit()
    if npmmp:
        #proceed = input('With the option npmmp enabled, a numpy memmap will be saved to ' + datasavepath + '. continue? [y/n] ').lower()
        proceed = 'y'
        if proceed != 'y':
            quit()
    # starting analysis
    with open_data(filepath, channel, deltat, 0.0, verbose) as data:

        samplerate = data.samplerate

        # selected time interval
        if timegiven == True:
            parttime1 = starttime * samplerate
            parttime2 = endtime * samplerate
            data = data[parttime1:parttime2]

        #split data into blocks
        nblock = int(deltat * samplerate)
        if len(data) % nblock != 0:
            blockamount = len(data) // nblock + 1
        else:
            blockamount = len(data) // nblock
        print('blockamount: ', blockamount)
        progress = 0
        print(progress, '%', flush=True, end=" ")
        fish = ProgressFish(total=blockamount)
        for idx in range(0, blockamount):
            blockdata = data[idx * nblock:(idx + 1) * nblock]
            if progress < (idx * 100 // blockamount):
                progress = (idx * 100) // blockamount
            progressstr = ' Filestatus: '
            fish.animate(amount=idx, dexextra=progressstr)
            pk, tr = detect_peaks(blockdata, thresh)
            troughs = tr
            if len(pk) > 3:
                peaks = dta.makeeventlist(pk, tr, blockdata, peakwidth)
                peakindices, peakx, peakh = dta.discardnearbyevents(
                    peaks[0], peaks[1], peakwidth)
                peaks = peaks[:, peakindices]
                if len(peaks) > 0:
                    if idx > startblock:
                        peaklist = dta.connect_blocks(peaklist)
                    else:
                        peaklist = Peaklist([])
                    aligned_snips = dta.cut_snippets(blockdata,
                                                     peaks[0],
                                                     15,
                                                     int_met="cubic",
                                                     int_fact=10,
                                                     max_offset=1.5)
                    pcs = dta.pc(aligned_snips)  #pc_refactor(aligned_snips)
                    order = 5
                    minpeaks = 3 if deltat < 2 else 10
                    labels = dta.cluster_events(pcs,
                                                peaks,
                                                order,
                                                0.4,
                                                minpeaks,
                                                False,
                                                method='DBSCAN')
                    peaks = np.append(peaks, [labels], axis=0)
                    #dta.plot_events_on_data(peaks, blockdata)
                    num = 1
                    if idx > startblock:
                        dta.alignclusterlabels(labels,
                                               peaklist,
                                               peaks,
                                               data=blockdata)
                    peaks, peaklist = dta.ampwalkclassify3_refactor(
                        peaks, peaklist)  # classification by amplitude
                    minlen = 6
                    peaks = dta.discard_short_classes(peaks, minlen)
                    if len(peaks[0]) > 0:
                        peaks = dta.discard_wave_pulses(peaks, blockdata)
                    if plot_steps == True:
                        dta.plot_events_on_data(peaks, blockdata)
                        pass
                    peaklist.len = nblock
                    worldpeaks = np.copy(peaks)
                    worldpeaks[0] = worldpeaks[0] + (idx * nblock)
                    thisblock_eods = np.delete(worldpeaks, 3, 0)
                    if npmmp:
                        if idx == startblock:
                            if not os.path.exists(datasavepath):
                                os.makedirs(datasavepath)
                            mmpname = "eods_" + filename[:-3] + "npmmp"
                        # save the peaks of the current buffered part to a numpy-memmap on the disk
                        save_EOD_events_to_npmmp(thisblock_eods, eods_len,
                                                 idx == startblock,
                                                 datasavepath, mmpname)
                        eods_len += len(thisblock_eods[0])
                    else:
                        if idx > 0:
                            all_eods = np.concatenate(
                                (all_eods, thisblock_eods), axis=1)
                        else:
                            all_eods = thisblock_eods
    #dta.plot_events_on_data(all_eods,data)
    print(
        'returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude'
    )
    if npmmp:
        all_eods = np.memmap(datasavepath + '/' + mmpname,
                             dtype='float64',
                             mode='r+',
                             shape=(4, eods_len),
                             order='F')
    if save == 1:
        path = filename[:-4] + "/"
        if not os.path.exists(path):
            os.makedirs(path)
        if eods_len > 0:
            np.save(datasavepath + "/eods8_" + filename[:-3] + "npy", all_eods)
            print('Saved!')
        else:
            print('not saved')
    return all_eods