def main(): parser = argparse.ArgumentParser() parser.add_argument( 'tanPath', help='file path: to tanimoto matrix pickle"../foo.pickle" ') parser.add_argument( 'cosPath', help='file path: to cosine matrix pickle"../foo.pickle" ') parser.add_argument( 'outPath', help='file path: to merged matrix pickle"../foo.pickle" ') parser.add_argument('lmbd', help='between 0 and 1') args = parser.parse_args() print 'loading tanimoto matrix pickle...' f = open(args.tanPath) tanMatrix = cPickle.load(f) f.close() print 'loading cosine matrix pickle...' f = open(args.cosPath) cosMatrix = cPickle.load(f) f.close() result = copy(cosMatrix) length = len(tanMatrix) fish = ProgressFish(total=length) for i in range(length): result[i] = fishers_chiSquare_method(cosMatrix[i], tanMatrix[i], float(args.lmbd)) fish.animate(amount=i) print 'pickling to ' + args.outPath f = open(args.outPath, 'w') cPickle.dump(result, f) f.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('tanPath', help='file path: to tanimoto matrix pickle"../foo.pickle" ') parser.add_argument('cosPath', help='file path: to cosine matrix pickle"../foo.pickle" ') parser.add_argument('outPath', help='file path: to merged matrix pickle"../foo.pickle" ') parser.add_argument('lmbd', help='between 0 and 1') args = parser.parse_args() print 'loading tanimoto matrix pickle...' f = open(args.tanPath) tanMatrix = cPickle.load(f) f.close() print 'loading cosine matrix pickle...' f = open(args.cosPath) cosMatrix = cPickle.load(f) f.close() result = copy(cosMatrix) length = len(tanMatrix) fish = ProgressFish(total = length ) for i in range(length): result[i] = fishers_chiSquare_method( cosMatrix[i], tanMatrix[i], float(args.lmbd) ) fish.animate(amount=i) print 'pickling to '+args.outPath f = open(args.outPath, 'w') cPickle.dump( result, f ) f.close()
def fit(self, x_train, y_train, x_dev=None, y_dev=None, batch_size=100): train_fn = theano.function(inputs=[self.batch_x, self.batch_y], outputs=self.cost, updates=self.updates, givens={ x: self.batch_x, y: self.batch_y }) train_set_iterator = DatasetMiniBatchIterator(self.rng, x_train, y_train, batch_size=batch_size, randomize=True) dev_set_iterator = DatasetMiniBatchIterator(self.rng, x_dev, y_dev, batch_size=batch_size, randomize=False) train_score = self._batch_score(train_set_iterator) dev_score = self._batch_score(dev_set_iterator) best_dev_error = numpy.inf epoch = 0 timer_train = time.time() while epoch < n_epochs: avg_costs = [] timer = time.time() fish = ProgressFish(total=len(train_set_iterator)) for i, (x, y) in enumerate(train_set_iterator, 1): fish.animate(amount=i) avg_cost = train_fn(x, y) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) mean_cost = numpy.mean(avg_costs) mean_train_error = numpy.mean(train_score()) dev_error = numpy.mean(dev_score()) print( 'epoch {} took {:.4f} seconds; ' 'avg costs: {:.4f}; train error: {:.4f}; ' 'dev error: {:.4f}'.format(epoch, time.time() - timer, mean_cost, mean_train_error, dev_error)) if dev_error < best_dev_error: best_dev_error = dev_error best_params = [numpy.copy(p.get_value()) for p in params] epoch += 1 print('Training took: {:.4f} seconds'.format(time.time() - timer_train)) for i, param in enumerate(best_params): params[i].set_value(param, borrow=True)
def get_vector_list( refs, refs_base): fish = ProgressFish(total=len(refs_base)) vector_list = ref_to_vector( refs_base[0], refs) # init for i in range(1, len(refs_base)): column_vector = ref_to_vector( refs_base[i],refs ) vector_list = column_stack( [ vector_list , column_vector ] ) fish.animate(amount=i) return vector_list
def do_records(records): num_records = LIMIT fish = ProgressFish(total=num_records) for i, record in enumerate(records): fish.animate(amount=i) keys = record.keys() x = get_o_list(record, x_keys) y = get_o_list(record, y_keys) y_count = len(filter((lambda r: r != "\"\"" and r != "''" and r != ""), y)) x_count = len(filter((lambda r: r != "\"\"" and r != "''" and r != ""), x)) if y_count == 0 or x_count == 0: continue home_dep_details = [""]*16 try: home_lsoa_code = record['"CEN_LSOA"'][1:-1] if home_lsoa_code != None and home_lsoa_code != '': home_dep_details = dep_2_mappings.find_one({'code': home_lsoa_code})['dep'] except Exception as e: print "home - probably wales/scotland" print e x += home_dep_details sd = [""]*133 try: sd2 = schools_data.find_one({'KS5_11SCHNAME': record["\"SCH_SCHOOLNAME\""][1:-1]}, {'_id': 0}) if sd2 != None: sd = map((lambda k: sd2[k]), ["LURN", "LLA", "LESTAB", "LLAESTAB", "LSCHNAME", "LSTREET", "LLOCALITY", "LADDRESS3", "LTOWN", "LPOSTCODE", "LTELNUM", "LICLOSE", "LISNEW", "LMINORGROUP", "LNFTYPE", "LISPRIMARY", "LISSECONDARY", "LISPOST16", "LAGEL", "LAGEH", "LGENDER", "LSFGENDER", "LRELDENOM", "LADMPOL", "LNEWACFLAG", "KS5_11RECTYPE", "KS5_11ALPHAIND", "KS5_11REGION", "KS5_11LASORT", "KS5_11LEA", "KS5_11ESTAB", "KS5_11URN", "KS5_11SCHNAME_AC", "KS5_11SCHNAME", "KS5_11ADDRESS1", "KS5_11ADDRESS2", "KS5_11ADDRESS3", "KS5_11TOWN", "KS5_11PCODE", "KS5_11TELNUM", "KS5_11CONTFLAG", "KS5_11NFTYPE", "KS5_11RELDENOM", "KS5_11ADMPOL", "KS5_11GENDER1618", "KS5_11FEEDER", "KS5_11AGERANGE", "KS5_11ICLOSE", "KS5_11TABKS2", "KS5_11TAB15", "KS5_11EXAMCONF", "KS5_11DUMMY1", "KS5_11TPUP1618", "KS5_11TALLPUPA", "KS5_11TALLPPSA", "KS5_11TALLPPEA", "KS5_11PTPASS1L3", "KS5_11PTPASS2LV3", "KS5_11PTPASS3LV3", "KS5_11TALLPPS08", "KS5_11TALLPPS09", "KS5_11TALLPPS10", "KS5_11TALLPPE08", "KS5_11TALLPPE09", "KS5_11TALLPPE10", "ABS_11LA", "ABS_11ESTAB", "ABS_11URN", "ABS_11PERCTOT", "ABS_11PERCUA", "ABS_11PPERSABS15", "ABS_11PPERSABS20", "CFR_11URN", "CFR_11LANUMBER", "CFR_11LONDON/NON-LONDON", "CFR_11MEDIAN", "CFR_11PUPILS", "CFR_11FSM", "CFR_11FSMBAND", "CFR_11GRANTFUNDING", "CFR_11SELFGENINCOME", "CFR_11TOTALINCOME", "CFR_11TEACHINGSTAFF", "CFR_11SUPPLYTEACHERS", "CFR_11EDUCATIONSUPPORTSTAFF", "CFR_11PREMISES", "CFR_11BACKOFFICE", "CFR_11CATERING", "CFR_11OTHERSTAFF", "CFR_11ENERGY", "CFR_11LEARNINGRESOURCES", "CFR_11ICT", "CFR_11BOUGHTIN", "CFR_11OTHER", "CFR_11TOTALEXPENDITURE", "SWF_11LA", "SWF_11URN", "SWF_11NTEA", "SWF_11NTEAAS", "SWF_11NNONTEA", "SWF_11NFTETEA", "SWF_11NFTETEAAS", "SWF_11RATPUPTEA", "SWF_11SALARY", "CENSUS_11URN", "CENSUS_11LAESTAB", "CENSUS_11NUMFTE", "CENSUS_11TOTPUPSENDN", "CENSUS_11TSENSAP", "CENSUS_11TSENA", "CENSUS_11TOTSENST", "CENSUS_11TOTSENAP", "CENSUS_11PSENSAP", "CENSUS_11PSENA", "CENSUS_11PTOTSENST", "CENSUS_11PTOTSENAP", "CENSUS_11TOTPUPEALDN", "CENSUS_11NUMEAL", "CENSUS_11NUMENGFL", "CENSUS_11NUMUNCFL", "CENSUS_11PNUMEAL", "CENSUS_11PNUMENGFL", "CENSUS_11PNUMUNCFL", "CENSUS_11TOTPUPFSMDN", "CENSUS_11NUMFSM", "CENSUS_11NUMNOFSM", "CENSUS_11PNUMFSM", "CENSUS_11PNUMNOFSM", "OLA", "OURN", "OSCHOOLNAME", "OPHASE", "OREPORTURL"]) except Exception as e: print "school details" print e x += sd school_dep_details = [""]*16 try: school_postcode = record["\"SCH_POSTCODE\""][1:-1] school_lsoa_code = postcodes.find_one({'Postcode2': school_postcode}, {'Code':1}) if school_lsoa_code != None: school_lsoa_code = school_lsoa_code['Code'] school_dep_details = dep_2_mappings.find_one({'code': school_lsoa_code})['dep'] except Exception as e: print "school deps" print e x += school_dep_details coll.insert({'x': x, 'y': y})
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'in_abs', help='input abstracts file path: "../*_abstracts.pickle" ') parser.add_argument( 'out_stemmed_abs', default='../stemmed_abstracts.pickle', help='file path of abstracts output file: "stemmed_abstracts.pickle"') parser.add_argument( 'out_words', default='../word_base.pickle', help='file path of words output file: "word_base.pickle"') args = parser.parse_args() print "loading abstracts..." abstracts_file = open(args.in_abs) abstracts = cPickle.load(abstracts_file) abstracts_file.close() words = [] stemmed_abstracts = {} fish = ProgressFish(total=len(abstracts)) cnt = 0 print "reading all words..." for (key, abstract) in abstracts.items(): sentence = wordpunct_tokenize(abstract.lower()) new_sentence = [] for word in sentence: if word.isalnum(): stemmed_word = stem(word) words.append(stemmed_word) new_sentence.append(stemmed_word) stemmed_abstracts[key] = list(set(new_sentence)) cnt += 1 fish.animate(amount=cnt) print "removing duplicates" words = set(words) print "persisting word_base" word_base = open(args.out_words, 'w') cPickle.dump(words, word_base) word_base.close() print "persisting abstracts" stemmed_abstracts_file = open(args.out_stemmed_abs, 'w') cPickle.dump(stemmed_abstracts, stemmed_abstracts_file) abstracts_file.close()
def audit_scores(ss_id=None): """ recalculates all scores and display the changes """ reload(sys) sys.setdefaultencoding('utf8') if not ss_id: print "iterating all submission sets" cs = CreditSet.objects.get(pk=6) qs = SubmissionSet.objects.filter(status='r').filter(creditset=cs) else: print "auditing SS: %s" % ss_id qs = [SubmissionSet.objects.get(pk=ss_id)] display_table = [] fish = ProgressFish(total=len(qs)) count = 0 for ss in qs: count += 1 fish.animate(amount=count) # current_score = get_score_object(ss) # recalculate_all_scores(ss) # recalculated_score = get_score_object(ss) # compare scores s1 = get_score_obj(ss, credits=False) ss.get_STARS_score(recalculate=True) s2 = get_score_obj(ss, credits=False) compare_score_objects(s1, s2, display_table) # current_score = round(ss.score, 2) # recalculated_score = round(ss.get_STARS_score(recalculate=True), 2) # # if abs(current_score - recalculated_score) > .1: # display_table.append([ # ss, current_score, recalculated_score, # current_score - recalculated_score, ss.date_submitted, ss.id]) # # if display_table: print tabulate(display_table, headers=[ 'submission set', 'name', 'id', 'calculated_score', 'recalculated_score', 'delta' ])
def main(): parser = argparse.ArgumentParser() parser.add_argument('in_abs', help='input abstracts file path: "../*_abstracts.pickle" ') parser.add_argument('out_stemmed_abs', default='../stemmed_abstracts.pickle', help='file path of abstracts output file: "stemmed_abstracts.pickle"') parser.add_argument('out_words', default='../word_base.pickle', help='file path of words output file: "word_base.pickle"') args = parser.parse_args() print "loading abstracts..." abstracts_file = open(args.in_abs) abstracts = cPickle.load(abstracts_file) abstracts_file.close() words = [] stemmed_abstracts = {} fish = ProgressFish(total=len(abstracts)) cnt = 0 print "reading all words..." for (key, abstract) in abstracts.items(): sentence = wordpunct_tokenize(abstract.lower()) new_sentence = [] for word in sentence: if word.isalnum(): stemmed_word = stem(word) words.append( stemmed_word ) new_sentence.append( stemmed_word ) stemmed_abstracts[key] = list(set(new_sentence)) cnt += 1 fish.animate(amount=cnt) print "removing duplicates" words = set(words) print "persisting word_base" word_base = open(args.out_words, 'w') cPickle.dump(words, word_base) word_base.close() print "persisting abstracts" stemmed_abstracts_file = open(args.out_stemmed_abs, 'w') cPickle.dump(stemmed_abstracts, stemmed_abstracts_file) abstracts_file.close()
def abstracts_to_vector(abstracts, word_base): print 'converting abstracts...' cnt = 0 word_base_dict = {} for word in word_base: word_base_dict[word] = cnt cnt += 1 cnt = 0 fish = ProgressFish(total=len(abstracts)) for key, abstract in abstracts.items(): vector_abstract = abstract_to_vector(abstract, word_base_dict) abstracts[key] = vector_abstract cnt += 1 fish.animate(amount=cnt) return abstracts
def abstracts_to_vector( abstracts, word_base): print 'converting abstracts...' cnt = 0 word_base_dict = {} for word in word_base: word_base_dict[word] = cnt cnt +=1 cnt = 0 fish = ProgressFish(total=len(abstracts)) for key, abstract in abstracts.items(): vector_abstract = abstract_to_vector( abstract, word_base_dict) abstracts[key] = vector_abstract cnt += 1 fish.animate(amount=cnt) return abstracts
def convert_to_list( abstracts, references ): no_of_docs = len(abstracts) if len(references) != no_of_docs: print 'abstracts and refs must have same size!' abs_list = list() refs_list = list() keys_list = list() print len(references) cnt = 1 fish = ProgressFish(total=len(abstracts)) for key in references.keys(): fish.animate(amount=cnt) cnt +=1 abs_list.append( abstracts[key] ) refs_list.append( references[key] ) keys_list.append( key ) return abs_list, refs_list, keys_list
def convert_to_list(abstracts, references): no_of_docs = len(abstracts) if len(references) != no_of_docs: print 'abstracts and refs must have same size!' abs_list = list() refs_list = list() keys_list = list() print len(references) cnt = 1 fish = ProgressFish(total=len(abstracts)) for key in references.keys(): fish.animate(amount=cnt) cnt += 1 abs_list.append(abstracts[key]) refs_list.append(references[key]) keys_list.append(key) return abs_list, refs_list, keys_list
def fit(self, x_train, y_train, x_dev=None, y_dev=None, batch_size=100): train_fn = theano.function(inputs=[self.batch_x, self.batch_y], outputs=self.cost, updates=self.updates, givens={x: self.batch_x, y: self.batch_y}) train_set_iterator = DatasetMiniBatchIterator(self.rng, x_train, y_train, batch_size=batch_size, randomize=True) dev_set_iterator = DatasetMiniBatchIterator(self.rng, x_dev, y_dev, batch_size=batch_size, randomize=False) train_score = self._batch_score(train_set_iterator) dev_score = self._batch_score(dev_set_iterator) best_dev_error = numpy.inf epoch = 0 timer_train = time.time() while epoch < n_epochs: avg_costs = [] timer = time.time() fish = ProgressFish(total=len(train_set_iterator)) for i, (x, y) in enumerate(train_set_iterator, 1): fish.animate(amount=i) avg_cost = train_fn(x, y) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) mean_cost = numpy.mean(avg_costs) mean_train_error = numpy.mean(train_score()) dev_error = numpy.mean(dev_score()) print('epoch {} took {:.4f} seconds; ' 'avg costs: {:.4f}; train error: {:.4f}; ' 'dev error: {:.4f}'.format(epoch,time.time() - timer, mean_cost, mean_train_error, dev_error)) if dev_error < best_dev_error: best_dev_error = dev_error best_params = [numpy.copy(p.get_value()) for p in params] epoch += 1 print('Training took: {:.4f} seconds'.format(time.time() - timer_train)) for i, param in enumerate(best_params): params[i].set_value(param, borrow=True)
def download_url(url, save_as): r = requests.get(url, cookies = {'_metacasts_session': SESSION_COOKIE}, stream=True) file_size = int(r.headers['content-length']) mime_type = r.headers['content-type'] downloaded_bytes = 0 file_name = save_as+guess_extension(mime_type) file_mode = 'wb' if( os.path.exists(file_name) ): existing_size = os.path.getsize(file_name) print 'size is {0} vs {1}'.format(existing_size, file_size) if( os.path.getsize(file_name) == file_size ): print file_name+" already exists. Skipping..." return else: print "File incomplete. Resuming..." file_mode = 'ab' r = requests.get(url, cookies = {'_metacasts_session': SESSION_COOKIE}, headers = {'Range': 'bytes={0}-'.format(existing_size)}, stream=True) downloaded_bytes = existing_size if( int(r.headers['content-length']) != file_size - existing_size ): print "File size mismatch. Reset download." os.remove(file_name) file_mode = 'wb' downloaded_bytes = 0 with open(file_name, file_mode) as f: print "Downloading {0}...".format(file_name) pf = ProgressFish(total=file_size) for index, chunk in enumerate(r.iter_content(chunk_size=128*1024)): if chunk: downloaded_bytes += len(chunk) f.write(chunk) f.flush() pf.animate(amount=downloaded_bytes) print "{0} finished download".format(file_name)
def random_sampling( abstracts, references, no_of_entries): length = check_lengths( abstracts, references) percentage = float(no_of_entries)/length*100 print 'reduce '+str(length)+' to '+str(no_of_entries) print "that's about "+str(percentage)+"% of the original size " fish = ProgressFish(total = int(no_of_entries) ) key_list = abstracts.keys() random.shuffle(key_list) new_abs = {} new_refs = {} for i in range( int(no_of_entries) ): fish.animate(amount=i) choice = key_list.pop() new_abs[choice] = abstracts[choice] new_refs[choice] = references[choice] return new_abs, new_refs
def fetch(urls): now = get_datetime(gmtime()) with futures.ThreadPoolExecutor(max_workers=5) as executor: future_to_url = { executor.submit(feedparser.parse, url): url for url in urls } fish = ProgressFish(total=len(urls)) i = 0 for future in futures.as_completed(future_to_url): url = future_to_url[future] if future.exception() is not None: log('Error reading %r: %s' % (url, future.exception())) continue feed = future.result() _feed, created = Feed.objects.get_or_create(url=url) for entry in feed.entries: try: pub_time = get_datetime(entry.date_parsed) except AttributeError: log('No date_parsed attribute on entry') continue except: log('Error reading entry date') continue try: assert pub_time - now < TIME_EPSILON except AssertionError: print >> sys.stderr, 'Entry is from future? %s %s' % (pub_time, url) _entry, created = Entry.objects.get_or_create(feed=_feed, pub_time=pub_time) for item in get_items(entry): Item.objects.get_or_create(value=item, entry=_entry) fish.animate(amount=i) i += 1
def main(): source = open("../dense_abstracts.pickle") out_file = open('../english_abstracts.pickle', 'w') out_diff = open('../english_abstracts_diff.pickle', 'w') print str(datetime.now())+ "filterNonEnglishAbstracts.py - deletes entries with non-English abstracts " if not source: print "This works.... NOT" return -1 deleted_abstract_ids = [] print "reading abstracts..." abstracts = cPickle.load(source) abs_tobeginwith = float(len(abstracts)) print "deleting non-English ones" empty_cnt = 0 fish = ProgressFish(total = len(abstracts)) count = 0 for article_id, abstract in abstracts.items(): count += 1 fish.animate(amount=count) if 'en' != guessLanguage( abstract ): empty_cnt += 1 deleted_abstract_ids.append(article_id) del abstracts[article_id] print str(datetime.now())+' starting to persist references to: '+ out_file.name +' and '+out_diff.name print "deleted "+str(empty_cnt)+" documents" print "that's "+str( empty_cnt / abs_tobeginwith )+"%" cPickle.dump(abstracts, out_file, -1) cPickle.dump(deleted_abstract_ids, out_diff, -1) source.close() out_file.close() out_diff.close()
def most_refs_sampling( abstracts, references, no_of_entries): no_of_entries = int(no_of_entries) length = check_lengths( abstracts, references) percentage = float(no_of_entries)/length*100 print 'reduce '+str(length)+' to '+str(no_of_entries) print "that's about "+str(percentage)+"% of the original size " fish = ProgressFish(total = int(no_of_entries) ) new_abs = {} new_refs = {} count_refs = {} for key, refs in references.items(): count_refs[key] = len(refs) refs_sorted_by_count = sorted(count_refs.iteritems(), key=operator.itemgetter(1)) for i in range( int(no_of_entries) ): fish.animate(amount=i) (choice, count) = refs_sorted_by_count.pop() new_abs[choice] = abstracts[choice] new_refs[choice] = references[choice] return new_abs, new_refs
def wait_for_completed_transfer(mountpoint, timeout_in_s = None): print "waiting for completed upload" if timeout_in_s is not None: print "waiting at most %s min" % (timeout_in_s/60) else: timeout_in_s = float("inf") CLOUDFUSION_NOT_UPLOADED_PATH = mountpoint + "/stats/notuploaded" time_waited = 0 if os.path.exists(CLOUDFUSION_NOT_UPLOADED_PATH): if timeout_in_s == float("inf"): fsh = ProgressFish(total=10000000000) else: fsh = ProgressFish(total=timeout_in_s) while os.path.getsize(CLOUDFUSION_NOT_UPLOADED_PATH) > 0: sleep(10) time_waited += 10 fsh.animate(amount=time_waited) if time_waited > timeout_in_s: break return print "" start = time.time() def no_network_activity(line): try: kbit_per_5min = sum(map(int, line.split())) if kbit_per_5min < 200: return True except ValueError: pass if start + timeout_in_s < time.time(): return True return False p = ifstat('-bzn', '600', _out=(lambda x:no_network_activity )) p.wait() p.kill()
def fetch(urls): now = get_datetime(gmtime()) with futures.ThreadPoolExecutor(max_workers=5) as executor: future_to_url = {executor.submit(feedparser.parse, url): url for url in urls} fish = ProgressFish(total=len(urls)) i = 0 for future in futures.as_completed(future_to_url): url = future_to_url[future] if future.exception() is not None: log('Error reading %r: %s' % (url, future.exception())) continue feed = future.result() _feed, created = Feed.objects.get_or_create(url=url) for entry in feed.entries: try: pub_time = get_datetime(entry.date_parsed) except AttributeError: log('No date_parsed attribute on entry') continue except: log('Error reading entry date') continue try: assert pub_time - now < TIME_EPSILON except AssertionError: print >>sys.stderr, 'Entry is from future? %s %s' % (pub_time, url) _entry, created = Entry.objects.get_or_create(feed=_feed, pub_time=pub_time) for item in get_items(entry): Item.objects.get_or_create(value=item, entry=_entry) fish.animate(amount=i) i += 1
def handle(self, *args, **options): conn = boto.connect_s3() src = conn.get_bucket('aashe-hub-dev') dst = conn.get_bucket('aashe-hub-production') dst_keys = [k.key for k in dst.list()] print "Copying all Files..." file_qs = File.objects.filter(item__isnull=False) fish = ProgressFish(total=file_qs.count()) count = 0 for f in file_qs: count += 1 fish.animate(amount=count) key = urlparse(f.item).path[1:] # if it doesn't already exist: if key not in dst_keys: dst.copy_key(key, src.name, key) print "Copying all Images..." image_qs = Image.objects.filter(image__isnull=False) fish2 = ProgressFish(total=image_qs.count()) count = 0 for i in image_qs: count += 1 fish2.animate(amount=count) key = urlparse(i.image).path[1:] # if it doesn't already exist: if key not in dst_keys: try: dst.copy_key(key, src.name, key) except boto.exception.S3ResponseError: print "**** failed to copy: %s" % key print
def progress(iterable, **kwargs): fish = ProgressFish(**kwargs) for i, item in enumerate(iterable): yield item fish.animate(amount=i)
def main(config_path, desc_path, target_path): massaged = io.StringIO() with io.open(config_path, 'rU') as infile: massaged.writelines(line.lstrip() for line in infile) massaged.seek(0) config = RawConfigParser() config.readfp(massaged) lfs_url = config.get('lfs', 'url').strip('"') api_url = posixpath.join(lfs_url, 'objects', 'batch') with io.open(desc_path, 'rU') as infile: target = dict(line.strip().partition(' ')[::2] for line in infile) if target.get('version') != 'https://git-lfs.github.com/spec/v1': raise ValueError("can't handle lfs", target['version']) oid_type, sep, oid = target['oid'].partition(':') if oid_type != 'sha256': raise ValueError("can't handle oid", target['oid']) size = int(target['size']) sys.stderr.write('Fetching {!r} from lfs...\n'.format( os.path.basename(target_path))) try: infile = open(target_path, 'rb') except IOError as e: if e.errno != errno.ENOENT: raise else: if file_matches(infile, size, oid): sys.stderr.write('Lucky! It was already up to date.\n') return req = Request(api_url, json.dumps({ 'operation': 'download', 'objects': [{ 'oid': oid, 'size': size, }], }).encode(), { 'Accept': JSON_TYPE, 'Content-Type': JSON_TYPE, }) with contextlib.closing(urlopen(req)) as respfile: if WRAP_RESPFILE: respfile = io.TextIOWrapper(respfile) resp = json.load(respfile) url = next(obj['actions']['download']['href'] for obj in resp['objects'] if obj['oid'] == oid) with contextlib.closing(urlopen(url)) as respfile: hasher = hashlib.sha256() with tempfile.NamedTemporaryFile( dir=os.path.dirname(target_path)) as outfile: fish = ProgressFish(total=size) fetched = 0 for chunk in iter(lambda: respfile.read(8192), b''): fetched += len(chunk) fish.animate(amount=fetched) hasher.update(chunk) outfile.write(chunk) if hasher.hexdigest() != oid: raise ValueError('hash failure', hasher.hexdigest(), oid) os.rename(outfile.name, target_path) open(outfile.name, 'w').close()
else: chapter_data = {'name': c.name, 'description': c.description} chapters_to_check.append(chapter_data) else: pass sites_with_links_back = [] sites_with_no_links_back = [] fish = ProgressFish(total=len(chapters_with_web_sites)) for i, c in enumerate(chapters_with_web_sites): req = c['urls'][0] try: r = br.open(req) fish.animate(amount=i) except urllib2.HTTPError, e: print e.code continue c['urls'] = r.geturl() doc = r.read() soup = BeautifulSoup.BeautifulSoup(doc) try: href = soup.findAll('a', {'href': re.compile('thezeitgeistmovement.com')}) if href: sites_with_links_back.append(c) else: sites_with_no_links_back.append(c) except: continue
def do_records(records): num_records = LIMIT fish = ProgressFish(total=num_records) for i, record in enumerate(records): fish.animate(amount=i) keys = record.keys() x = get_o_list(record, x_keys) y = get_o_list(record, y_keys) y_count = len( filter((lambda r: r != "\"\"" and r != "''" and r != ""), y)) x_count = len( filter((lambda r: r != "\"\"" and r != "''" and r != ""), x)) if y_count == 0 or x_count == 0: continue home_dep_details = [""] * 16 try: home_lsoa_code = record['"CEN_LSOA"'][1:-1] if home_lsoa_code != None and home_lsoa_code != '': home_dep_details = dep_2_mappings.find_one( {'code': home_lsoa_code})['dep'] except Exception as e: print "home - probably wales/scotland" print e x += home_dep_details sd = [""] * 133 try: sd2 = schools_data.find_one( {'KS5_11SCHNAME': record["\"SCH_SCHOOLNAME\""][1:-1]}, {'_id': 0}) if sd2 != None: sd = map((lambda k: sd2[k]), [ "LURN", "LLA", "LESTAB", "LLAESTAB", "LSCHNAME", "LSTREET", "LLOCALITY", "LADDRESS3", "LTOWN", "LPOSTCODE", "LTELNUM", "LICLOSE", "LISNEW", "LMINORGROUP", "LNFTYPE", "LISPRIMARY", "LISSECONDARY", "LISPOST16", "LAGEL", "LAGEH", "LGENDER", "LSFGENDER", "LRELDENOM", "LADMPOL", "LNEWACFLAG", "KS5_11RECTYPE", "KS5_11ALPHAIND", "KS5_11REGION", "KS5_11LASORT", "KS5_11LEA", "KS5_11ESTAB", "KS5_11URN", "KS5_11SCHNAME_AC", "KS5_11SCHNAME", "KS5_11ADDRESS1", "KS5_11ADDRESS2", "KS5_11ADDRESS3", "KS5_11TOWN", "KS5_11PCODE", "KS5_11TELNUM", "KS5_11CONTFLAG", "KS5_11NFTYPE", "KS5_11RELDENOM", "KS5_11ADMPOL", "KS5_11GENDER1618", "KS5_11FEEDER", "KS5_11AGERANGE", "KS5_11ICLOSE", "KS5_11TABKS2", "KS5_11TAB15", "KS5_11EXAMCONF", "KS5_11DUMMY1", "KS5_11TPUP1618", "KS5_11TALLPUPA", "KS5_11TALLPPSA", "KS5_11TALLPPEA", "KS5_11PTPASS1L3", "KS5_11PTPASS2LV3", "KS5_11PTPASS3LV3", "KS5_11TALLPPS08", "KS5_11TALLPPS09", "KS5_11TALLPPS10", "KS5_11TALLPPE08", "KS5_11TALLPPE09", "KS5_11TALLPPE10", "ABS_11LA", "ABS_11ESTAB", "ABS_11URN", "ABS_11PERCTOT", "ABS_11PERCUA", "ABS_11PPERSABS15", "ABS_11PPERSABS20", "CFR_11URN", "CFR_11LANUMBER", "CFR_11LONDON/NON-LONDON", "CFR_11MEDIAN", "CFR_11PUPILS", "CFR_11FSM", "CFR_11FSMBAND", "CFR_11GRANTFUNDING", "CFR_11SELFGENINCOME", "CFR_11TOTALINCOME", "CFR_11TEACHINGSTAFF", "CFR_11SUPPLYTEACHERS", "CFR_11EDUCATIONSUPPORTSTAFF", "CFR_11PREMISES", "CFR_11BACKOFFICE", "CFR_11CATERING", "CFR_11OTHERSTAFF", "CFR_11ENERGY", "CFR_11LEARNINGRESOURCES", "CFR_11ICT", "CFR_11BOUGHTIN", "CFR_11OTHER", "CFR_11TOTALEXPENDITURE", "SWF_11LA", "SWF_11URN", "SWF_11NTEA", "SWF_11NTEAAS", "SWF_11NNONTEA", "SWF_11NFTETEA", "SWF_11NFTETEAAS", "SWF_11RATPUPTEA", "SWF_11SALARY", "CENSUS_11URN", "CENSUS_11LAESTAB", "CENSUS_11NUMFTE", "CENSUS_11TOTPUPSENDN", "CENSUS_11TSENSAP", "CENSUS_11TSENA", "CENSUS_11TOTSENST", "CENSUS_11TOTSENAP", "CENSUS_11PSENSAP", "CENSUS_11PSENA", "CENSUS_11PTOTSENST", "CENSUS_11PTOTSENAP", "CENSUS_11TOTPUPEALDN", "CENSUS_11NUMEAL", "CENSUS_11NUMENGFL", "CENSUS_11NUMUNCFL", "CENSUS_11PNUMEAL", "CENSUS_11PNUMENGFL", "CENSUS_11PNUMUNCFL", "CENSUS_11TOTPUPFSMDN", "CENSUS_11NUMFSM", "CENSUS_11NUMNOFSM", "CENSUS_11PNUMFSM", "CENSUS_11PNUMNOFSM", "OLA", "OURN", "OSCHOOLNAME", "OPHASE", "OREPORTURL" ]) except Exception as e: print "school details" print e x += sd school_dep_details = [""] * 16 try: school_postcode = record["\"SCH_POSTCODE\""][1:-1] school_lsoa_code = postcodes.find_one( {'Postcode2': school_postcode}, {'Code': 1}) if school_lsoa_code != None: school_lsoa_code = school_lsoa_code['Code'] school_dep_details = dep_2_mappings.find_one( {'code': school_lsoa_code})['dep'] except Exception as e: print "school deps" print e x += school_dep_details coll.insert({'x': x, 'y': y})
def analyze_long_pulse_data_file(filepath, save=0, plot_steps=0, new=1, starttime=0, endtime=0): """ analyzes timeseries of a pulse fish EOD recording """ # Script to detect and classify EODs in recordings of weakly electric pulse # fish, Dexter Früh, 2018 # # results will be saved in workingdirectory/recording/ # # input: # - [Recorded Timeseries] recording.WAV # outputs(optional): # - [Detected and Classified EODs] # (Numpy Array with Shape (Number of EODs, 4 (Attributes of EODs)), # with the EOD-Attributes # - x-location of the EOD # (time/x-coordinate/datapoint in recording) # - y-location of the EOD # (Amplitude of the positive peak of the pulse-EOD) # - height of the EOD(largest distance between peak and through in the EOD) # - class of the EOD # eods_recording.npy # - [plots of the results of each analyse step for each # analysepart (timeinterval of length = deltat) of the recording] # # required command line arguments at function call # - save : if True, save the results to a numpy file (possibly # overwrite existing) # - plot : if True, plot results in each analysestep # - new : if True, do a new analysis of the recording, even if there # is an existing analyzed .npy file with the right name. # import sys import numpy as np import copy from scipy.stats import gmean from scipy import stats from scipy import signal from scipy import optimize import matplotlib from fish import ProgressFish import matplotlib.pyplot as plt from thunderfish.dataloader import open_data from thunderfish.peakdetection import detect_peaks from scipy.interpolate import interp1d from scipy.signal import savgol_filter from collections import deque import ntpath import nixio as nix import time import os from shutil import copy2 from ownDataStructures import Peak, Tr, Peaklist import DextersThunderfishAddition as dta from IPython import embed # parameters for the analysis deltat = 30.0 # seconds of buffer size thresh = 0.04 # minimal threshold for peakdetection peakwidth = 20 # width of a peak and minimal distance between two EODs # basic parameters for thunderfish.dataloader.open_data verbose = 0 channel = 0 ultimate_threshold = thresh + 0.01 startblock = 0 # timeinterval to analyze other than the whole recording #starttime = 0 #endtime = 0 #timegiven = 0 home = os.path.expanduser('~') os.chdir(home) new = int(sys.argv[4]) save = int(sys.argv[2]) plot = int(sys.argv[3]) starttime = int(starttime) endtime = int(endtime) timegiven = False if endtime > starttime >= 0: timegiven = True peaks = np.array([]) troughs = np.array([]) filename = path_leaf(filepath) datasavepath = filename[:-4] proceed = input( 'Currently operates in home directory. If given a pulsefish recording filename.WAV, then a folder filename/ will be created in the home directory and all relevant files will be stored there. continue? [y/n] ' ).lower() if proceed != 'y': quit() if not os.path.exists(datasavepath): os.makedirs(datasavepath) if save == 1: print('files will be saved to: ', datasavepath) eods_len = 0 # starting analysis if new == 1 or not os.path.exists(filename[:-4] + "/eods5_" + filename[:-3] + "npy"): if filepath != home + '/' + datasavepath + '/' + filename: print(filepath, datasavepath + '/' + filename) proceed = input( 'Copy datafile to ' + datasavepath + ' where all the other files will be stored? [y/n] ').lower() if proceed == 'y': copy2(filepath, datasavepath) # import data with open_data(filepath, channel, deltat, 0.0, verbose) as data: samplerate = data.samplerate nblock = int(deltat * data.samplerate) # selected time interval if timegiven == True: parttime1 = starttime * samplerate parttime2 = endtime * samplerate data = data[parttime1:parttime2] #split data into blocks if len(data) % nblock != 0: blockamount = len(data) // nblock + 1 else: blockamount = len(data) // nblock # progress bar print('blockamount: ', blockamount) progress = 0 print(progress, '%', flush=True, end=" ") fish = ProgressFish(total=blockamount) # blockwise analysis for idx in range(0, blockamount): blockdata = data[idx * nblock:(idx + 1) * nblock] # progressbar if progress < (idx * 100 // blockamount): progress = (idx * 100) // blockamount progressstr = ' Filestatus: ' fish.animate(amount=idx, dexextra=progressstr) #---analysis----------------------------------------------------------------------- # step1: detect peaks in timeseries pk, tr = detect_peaks(blockdata, thresh) troughs = tr # continue with analysis only if multiple peaks are detected if len(pk) > 3: peaks = dta.makeeventlist(pk, tr, blockdata, peakwidth) #dta.plot_events_on_data(peaks, blockdata) peakindices, peakx, peakh = dta.discardnearbyevents( peaks[0], peaks[1], peakwidth) peaks = peaks[:, peakindices] if len(peaks) > 0: # used to connect the results of the current block with the previous if idx > startblock: peaklist = dta.connect_blocks(peaklist) else: peaklist = Peaklist([]) aligned_snips = dta.cut_snippets(blockdata, peaks[0], 15, int_met="cubic", int_fact=10, max_offset=1.5) pcs = dta.pc( aligned_snips) #pc_refactor(aligned_snips) order = 5 minpeaks = 3 if deltat < 2 else 10 labels = dta.cluster_events(pcs, peaks, order, 0.4, minpeaks, False, method='DBSCAN') peaks = np.append(peaks, [labels], axis=0) #dta.plot_events_on_data(peaks, blockdata) num = 1 if idx > startblock: dta.alignclusterlabels(labels, peaklist, peaks, data=blockdata) peaks, peaklist = dta.ampwalkclassify3_refactor( peaks, peaklist) # classification by amplitude minlen = 6 # >=1 peaks = dta.discard_short_classes(peaks, minlen) if len(peaks[0]) > 0: peaks = dta.discard_wave_pulses(peaks, blockdata) # plots the data part and its detected and classified peaks if plot_steps == True: dta.plot_events_on_data(peaks, blockdata) pass worldpeaks = np.copy(peaks) # change peaks location in the buffered part to the location relative to the peaklist.len = nblock # peaklocations relative to whole recording worldpeaks[0] = worldpeaks[0] + (idx * nblock) thisblock_eods = np.delete(peaks, 3, 0) # save the peaks of the current buffered part to a numpy-memmap on the disk mmpname = "eods_" + filename[:-3] + "npmmp" save_EOD_events_to_npmmp(thisblock_eods, eods_len, idx == startblock, datasavepath, mmpname) eods_len += len(thisblock_eods[0]) # after the last buffered part has finished, save the memory mapped # numpy file of the detected and classified EODs to a .npy file to the # disk eods = np.memmap(datasavepath + "/eods_" + filename[:-3] + "npmmp", dtype='float64', mode='r+', shape=(4, eods_len), order='F') if save == 1: path = datasavepath + "/" if not os.path.exists(path): os.makedirs(path) if eods_len > 0: print('Saved!') np.save(datasavepath + "/eods8_" + datasavepath + "npy", eods) else: #np.save(filename[:-4]+"/eods5_"+filename[:-3]+"npy", thisblock_eods) print('not saved') else: # if there already has been a certain existing result file and 'new' was set to False print('already analyzed') print( 'returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude' ) return eods
'label_mm' :mm_label, } pyithu_list.append(pyithu_json) post['pyithu'] = pyithu_list amyotha_list = [] for amyotha_id in amyotha_ids: r = requests.get(base_url+'/en/posts/'+ amyotha_id) en_label = r.json()['result']['label'] r = requests.get(base_url+'/my/posts/'+ amyotha_id) mm_label = r.json()['result']['label'] amyotha_json = { 'popit_id': amyotha_id, 'label_en': en_label, 'label_mm' :mm_label, } amyotha_list.append(amyotha_json) post['amyotha'] = amyotha_list fish.animate(amount=progress) json_out = json.dumps(posts, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf8') with io.open('townships.json', 'w', encoding='utf8') as json_file: json_file.write(unicode(json_out))
session = manager.mgr.getSessionObject(vbox) machine.lockMachine(session, 1) console = session.console usb_uuid = searchUSBBySerial(serial) console.attachUSBDevice(usb_uuid) manager.closeMachineSession(session) def machineIpAddress(machine): return machine.getGuestProperty('/VirtualBox/GuestInfo/Net/0/V4/IP')[0] if __name__ == '__main__': machine_uuid = str(uuid.uuid4()) flags = 'UUID={0}'.format(machine_uuid) #machine = createMachine(None, machine_uuid, [], 'Debian', flags) machine = createMachine(machine_uuid, uuid=machine_uuid, settings_file=None, groups=[], os_type_id='Debian', flags=flags, force_overwrite=False) fromMachine = searchMachine('987ae866-a4c0-4723-896e-8897fe17f3f0') progress = cloneMachine(fromMachine, machine, 1, []) while progress.operationPercent < 100: fish.animate(amount=progress.operationPercent) vbox.registerMachine(machine) progress, session = launchMachine(machine) while progress.operationPercent < 100: fish.animate(amount=progress.operationPercent) manager.closeMachineSession(session) time.sleep(60) attachUSBBySerial(machine, '8A000080Q') print machineIpAddress(machine)
if type(in_map[k]) == int or type(in_map[k]) == float: in_dict[0].update({k: in_map[k]}) if type(in_map[k]) == unicode: in_dict[0].update({k: in_map[k].encode('utf-8')}) # if in_map.has_key('kind'): # pdb.set_trace() if type(in_map[k]) == list: handle_lists(in_map[k], in_dict, k) result = [] fish = ProgressFish(total=len(movie_ids)) for index, i in enumerate(movie_ids): # if index%100 == 0: # print "On movie number: ", index fish.animate(amount=index) m = im.get_movie(i) maps = {} for keys in m.iterkeys(): # print keys, m[keys] maps.update({keys:m[keys]}) dicts = [{}] if maps != {} : ## To ensure that maps are filled, otherwise dicts is not defined! makerows(maps, dicts) # print dicts result.append(df1.append(dicts)) # print result x = pandas.concat(result) x.to_csv("results_5k.csv", encoding="utf-8")
ks51 = db.ks5_0910 ks52 = db.ks5_1011 mappings = db.postcode_lsoa mappings.drop() def get_p(di): return set(map((lambda r: r['"SCH_POSTCODE"']), di)) records = list(get_p(ks51.find({},{'"SCH_POSTCODE"':1}).limit(LIMIT)).intersection(get_p(ks52.find({},{'"SCH_POSTCODE"':1}).limit(LIMIT)))) postcodes = map((lambda r: r[1:-1].replace(' ', '')), records) print len(postcodes) i = 0 fish = ProgressFish(total=len(postcodes)) for postcode in postcodes: time.sleep(0.1) fish.animate(amount=i) try: i += 1 url = "http://mapit.mysociety.org/postcode/%s" % postcode result = simplejson.load(urllib.urlopen(url)) lsoa_code = filter((lambda area: area['type_name'] == "Lower Layer Super Output Area (Full)"), result['areas'].values())[0]['name'] mappings.insert({'lsoa_code': lsoa_code, 'postcode': postcode}) except Exception as e: print "NOO" mappings.create_index([("postcode", pymongo.ASCENDING)])
def analyze_pulse_data(filepath, absolutepath=True, deltat=30, thresh=0.04, starttime=0, endtime=0, savepath=False, save=False, npmmp=False, plot_steps=False, plot_result=False): ''' analyzes timeseries of a pulse fish EOD recording Parameters ---------- filepath: WAV-file with the recorded timeseries deltat: int, optional time for a single analysisblock (recommended less than a minute, due to principal component clustering on the EOD-waveforms) thresh: float, optional minimum threshold for the peakdetection (if computing frequencies recommended a tiny bit lower than the wished threshold, and instead discard the EOD below the wished threshold after computing the frequencies for each EOD.) starttime: int or, str of int, optional time into the data from where to start the analysis, seconds. endtime: int or str of int, optional time into the data where to end the analysis, seconds, larger than starttime. savepath = Boolean or str, optional path to where to save results and intermediate result, only needed if save or npmmp is True. string to specify a relative path to the directory where results and intermediate results will bed or False to use preset savepath, which is ~/filepath/ or True to specify savepath as input when the script is running save: Boolean, optional True to save the results into a npy file at the savepath npmmp: Boolean, optional True to save intermediate results into a npmmp at the savepath, only recommended in case of memory overflow plot_steps: Boolean, optional True to plot the results of each analysis block plot_results: Boolean, optional True to plot the results of the final analysis. Not recommended for long recordings due to %TODO Returns ------- eods: numpy array 2D numpy array. first axis: attributes of an EOD (x (datapoints), y (recorded voltage), height (difference from maximum to minimum), class), second axis: EODs in chronological order. ''' import sys import numpy as np import copy from scipy.stats import gmean from scipy import stats from scipy import signal from scipy import optimize import matplotlib from fish import ProgressFish import matplotlib.pyplot as plt from thunderfish.dataloader import open_data from thunderfish.peakdetection import detect_peaks from scipy.interpolate import interp1d from scipy.signal import savgol_filter from collections import deque import ntpath import nixio as nix import time import os from shutil import copy2 from ownDataStructures import Peak, Tr, Peaklist import DextersThunderfishAddition as dta from IPython import embed # parameters for the analysis thresh = 0.04 # minimal threshold for peakdetection peakwidth = 20 # width of a peak and minimal distance between two EODs # basic parameters for thunderfish.dataloader.open_data verbose = 0 channel = 0 ultimate_threshold = thresh + 0.01 startblock = 0 # timeinterval to analyze other than the whole recording #starttime = 0 #endtime = 0 #timegiven = 0 home = os.path.expanduser('~') if absolutepath: filepath = home + '/' + filepath #os.chdir(home) #save = int(save) #plot_steps = int(plot_steps) starttime = int(starttime) endtime = int(endtime) timegiven = False if endtime > starttime >= 0: timegiven = True peaks = np.array([]) troughs = np.array([]) filename = path_leaf(filepath) eods_len = 0 if savepath == False: datasavepath = home + '/' + filename[:-4] elif savepath == True: datasavepath = input( 'With the option npmmp enabled, a numpy memmap will be saved to: ' ).lower() else: datasavepath = savepath if save and ( os.path.exists(datasavepath + "/eods8_" + filename[:-3] + "npy") or os.path.exists(datasavepath + "/eods5_" + filename[:-3] + "npy")): print( 'there already exists an analyzed file, aborting. Change the code if you don\'t want to abort' ) quit() if npmmp: #proceed = input('With the option npmmp enabled, a numpy memmap will be saved to ' + datasavepath + '. continue? [y/n] ').lower() proceed = 'y' if proceed != 'y': quit() # starting analysis with open_data(filepath, channel, deltat, 0.0, verbose) as data: samplerate = data.samplerate # selected time interval if timegiven == True: parttime1 = starttime * samplerate parttime2 = endtime * samplerate data = data[parttime1:parttime2] #split data into blocks nblock = int(deltat * samplerate) if len(data) % nblock != 0: blockamount = len(data) // nblock + 1 else: blockamount = len(data) // nblock print('blockamount: ', blockamount) progress = 0 print(progress, '%', flush=True, end=" ") fish = ProgressFish(total=blockamount) for idx in range(0, blockamount): blockdata = data[idx * nblock:(idx + 1) * nblock] if progress < (idx * 100 // blockamount): progress = (idx * 100) // blockamount progressstr = ' Filestatus: ' fish.animate(amount=idx, dexextra=progressstr) pk, tr = detect_peaks(blockdata, thresh) troughs = tr if len(pk) > 3: peaks = dta.makeeventlist(pk, tr, blockdata, peakwidth) peakindices, peakx, peakh = dta.discardnearbyevents( peaks[0], peaks[1], peakwidth) peaks = peaks[:, peakindices] if len(peaks) > 0: if idx > startblock: peaklist = dta.connect_blocks(peaklist) else: peaklist = Peaklist([]) aligned_snips = dta.cut_snippets(blockdata, peaks[0], 15, int_met="cubic", int_fact=10, max_offset=1.5) pcs = dta.pc(aligned_snips) #pc_refactor(aligned_snips) order = 5 minpeaks = 3 if deltat < 2 else 10 labels = dta.cluster_events(pcs, peaks, order, 0.4, minpeaks, False, method='DBSCAN') peaks = np.append(peaks, [labels], axis=0) #dta.plot_events_on_data(peaks, blockdata) num = 1 if idx > startblock: dta.alignclusterlabels(labels, peaklist, peaks, data=blockdata) peaks, peaklist = dta.ampwalkclassify3_refactor( peaks, peaklist) # classification by amplitude minlen = 6 peaks = dta.discard_short_classes(peaks, minlen) if len(peaks[0]) > 0: peaks = dta.discard_wave_pulses(peaks, blockdata) if plot_steps == True: dta.plot_events_on_data(peaks, blockdata) pass peaklist.len = nblock worldpeaks = np.copy(peaks) worldpeaks[0] = worldpeaks[0] + (idx * nblock) thisblock_eods = np.delete(worldpeaks, 3, 0) if npmmp: if idx == startblock: if not os.path.exists(datasavepath): os.makedirs(datasavepath) mmpname = "eods_" + filename[:-3] + "npmmp" # save the peaks of the current buffered part to a numpy-memmap on the disk save_EOD_events_to_npmmp(thisblock_eods, eods_len, idx == startblock, datasavepath, mmpname) eods_len += len(thisblock_eods[0]) else: if idx > 0: all_eods = np.concatenate( (all_eods, thisblock_eods), axis=1) else: all_eods = thisblock_eods #dta.plot_events_on_data(all_eods,data) print( 'returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude' ) if npmmp: all_eods = np.memmap(datasavepath + '/' + mmpname, dtype='float64', mode='r+', shape=(4, eods_len), order='F') if save == 1: path = filename[:-4] + "/" if not os.path.exists(path): os.makedirs(path) if eods_len > 0: np.save(datasavepath + "/eods8_" + filename[:-3] + "npy", all_eods) print('Saved!') else: print('not saved') return all_eods