def update_stats(verbose=True): if verbose: print("Finding max and min conductor and total number of curves") Nlist = curves.distinct('conductor') Nmax = int(max(Nlist)) Nmin = int(min(Nlist)) Ncurves = int(curves.count()) if verbose: print("{} curves of conductor from {} to {}".format(Ncurves,Nmin,Nmax)) curves.stats.insert_one({'_id':'conductor', 'min':Nmin, 'max': Nmax, 'total': Ncurves}) from data_mgt.utilities.rewrite import (update_attribute_stats, update_joint_attribute_stats) # Basic counts for these attributes: ec = C.elliptic_curves if verbose: print("Adding simple counts for rank, torsion, torsion structure and Sha") update_attribute_stats(ec, 'curves', ['rank', 'torsion', 'torsion_structure', 'sha']) # rank counts for isogeny classes: if verbose: print("Adding isogeny class rank counts") update_attribute_stats(ec, 'curves', 'rank', prefix='class', filter={'number':1}) # torsion order by rank: if verbose: print("Adding torsion counts by rank") update_joint_attribute_stats(ec, 'curves', ['rank','torsion'], prefix='byrank', unflatten=True) # torsion structure by rank: if verbose: print("Adding torsion structure counts by rank") update_joint_attribute_stats(ec, 'curves', ['rank','torsion_structure'], prefix='byrank', unflatten=True) # sha by rank: if verbose: print("Adding sha counts by rank") update_joint_attribute_stats(ec, 'curves', ['rank','sha'], prefix='byrank', unflatten=True)
def update_stats(verbose=True): if verbose: print("Finding max and min conductor and total number of curves") Nlist = curves.distinct('conductor') Nmax = int(max(Nlist)) Nmin = int(min(Nlist)) Ncurves = int(curves.count()) if verbose: print("{} curves of conductor from {} to {}".format( Ncurves, Nmin, Nmax)) curves.stats.insert_one({ '_id': 'conductor', 'min': Nmin, 'max': Nmax, 'total': Ncurves }) from data_mgt.utilities.rewrite import (update_attribute_stats, update_joint_attribute_stats) # Basic counts for these attributes: ec = C.elliptic_curves if verbose: print( "Adding simple counts for rank, torsion, torsion structure and Sha" ) update_attribute_stats(ec, 'curves', ['rank', 'torsion', 'torsion_structure', 'sha']) # rank counts for isogeny classes: if verbose: print("Adding isogeny class rank counts") update_attribute_stats(ec, 'curves', 'rank', prefix='class', filter={'number': 1}) # torsion order by rank: if verbose: print("Adding torsion counts by rank") update_joint_attribute_stats(ec, 'curves', ['rank', 'torsion'], prefix='byrank', unflatten=True) # torsion structure by rank: if verbose: print("Adding torsion structure counts by rank") update_joint_attribute_stats(ec, 'curves', ['rank', 'torsion_structure'], prefix='byrank', unflatten=True) # sha by rank: if verbose: print("Adding sha counts by rank") update_joint_attribute_stats(ec, 'curves', ['rank', 'sha'], prefix='byrank', unflatten=True)
'_id' : doc_id, 'distinct' : unique_counts, } db[coll + '.stats'].replace_one({'_id':doc_id}, doc, upsert=True) # Get database connection C = getDBConnection() db = C.curve_automorphisms ############################ # Collect count statistics # ############################ print("Collecting statistics on genus and dim attributes...") # update_attribute_stats(db, 'passports', ['genus', 'dim', 'passport_label', 'total_label']) update_attribute_stats(db, 'passports', ['genus']) update_attribute_stats(db, 'passports', ['dim']) # Count unique number of entires print("Counting number of unique entries for passport_label and total_label attributes...") update_unique_count(db, 'passports', 'passport_label') update_unique_count(db, 'passports', 'total_label') ############################ # Collect joint statistics # ############################ print("Collecting statistics on unique families, refined passports, and generating vectors per genus.") # Unique groups per genus update_joint_attribute_stats (db, 'passports', ['genus','group'], prefix='bygenus', unflatten=True) # TODO the group stats already provides this info, although it requires parsing the group string
def update_stats(verbose=True): from data_mgt.utilities.rewrite import update_attribute_stats from bson.code import Code ec = C.elliptic_curves ecdbstats = ec.nfcurves.stats # get list of degrees degrees = nfcurves.distinct('degree') if verbose: print("degrees: {}".format(degrees)) # get list of signatures for each degree. Note that it would not # work to use nfcurves.find({'degree':d}).distinct('signature') # since 'signature' is currently a list of integers an mongo would # return a list of integers, not a list of lists. With hindsight # it would have been better to store the signature as a string. if verbose: print("Adding signatures_by_degree") reducer = Code("""function(key,values){return Array.sum(values);}""") attr = 'signature' mapper = Code("""function(){emit(""+this.""" + attr + """,1);}""") sigs_by_deg = {} for d in degrees: sigs_by_deg[str(d)] = [ r['_id'] for r in nfcurves.inline_map_reduce( mapper, reducer, query={'degree': d}) ] if verbose: print("degree {} has signatures {}".format(d, sigs_by_deg[str(d)])) entry = {'_id': 'signatures_by_degree'} ecdbstats.delete_one(entry) entry.update(sigs_by_deg) ecdbstats.insert_one(entry) # get list of fields for each signature. Simple code here faster than map/reduce if verbose: print("Adding fields_by_signature") from sage.misc.flatten import flatten sigs = flatten(sigs_by_deg.values()) fields_by_sig = dict([ sig, nfcurves.find({ 'signature': [int(x) for x in sig.split(",")] }).distinct('field_label') ] for sig in sigs) entry = {'_id': 'fields_by_signature'} ecdbstats.delete_one(entry) entry.update(fields_by_sig) ecdbstats.insert_one(entry) # get list of fields for each degree if verbose: print("Adding fields_by_degree") fields_by_deg = dict( [str(d), sorted(nfcurves.find({ 'degree': d }).distinct('field_label'))] for d in degrees) entry = {'_id': 'fields_by_degree'} ecdbstats.delete_one(entry) entry.update(fields_by_deg) ecdbstats.insert_one(entry) fields = flatten(fields_by_deg.values()) if verbose: print("{} fields, {} signatures, {} degrees".format( len(fields), len(sigs), len(degrees))) if verbose: print("Adding curve counts for torsion order, torsion structure") update_attribute_stats(ec, 'nfcurves', ['torsion_order', 'torsion_structure']) if verbose: print("Adding curve counts by degree, signature and field") update_attribute_stats(ec, 'nfcurves', ['degree', 'signature', 'field_label']) if verbose: print("Adding class counts by degree, signature and field") update_attribute_stats(ec, 'nfcurves', ['degree', 'signature', 'field_label'], prefix="classes", filter={'number': int(1)}) # conductor norm ranges: # total: if verbose: print("Adding curve and class counts and conductor range") norms = ec.nfcurves.distinct('conductor_norm') data = { 'ncurves': ec.nfcurves.count(), 'nclasses': ec.nfcurves.find({ 'number': 1 }).count(), 'min_norm': min(norms), 'max_norm': max(norms), } entry = {'_id': 'conductor_norm'} ecdbstats.delete_one(entry) entry.update(data) ecdbstats.insert_one(entry) # by degree: if verbose: print("Adding curve and class counts and conductor range, by degree") degree_data = {} for d in degrees: query = {'degree': d} res = nfcurves.find(query) ncurves = res.count() Ns = res.distinct('conductor_norm') min_norm = min(Ns) max_norm = max(Ns) query['number'] = 1 nclasses = nfcurves.count(query) degree_data[str(d)] = { 'ncurves': ncurves, 'nclasses': nclasses, 'min_norm': min_norm, 'max_norm': max_norm, } entry = {'_id': 'conductor_norm_by_degree'} ecdbstats.delete_one(entry) entry.update(degree_data) ecdbstats.insert_one(entry) # by signature: if verbose: print( "Adding curve and class counts and conductor range, by signature") sig_data = {} for sig in sigs: query = {'signature': [int(c) for c in sig.split(",")]} res = nfcurves.find(query) ncurves = res.count() Ns = res.distinct('conductor_norm') min_norm = min(Ns) max_norm = max(Ns) query['number'] = 1 nclasses = nfcurves.count(query) sig_data[sig] = { 'ncurves': ncurves, 'nclasses': nclasses, 'min_norm': min_norm, 'max_norm': max_norm, } entry = {'_id': 'conductor_norm_by_signature'} ecdbstats.delete_one(entry) entry.update(sig_data) ecdbstats.insert_one(entry) # by field: if verbose: print("Adding curve and class counts and conductor range, by field") entry = {'_id': 'conductor_norm_by_field'} ecdbstats.delete_one(entry) field_data = {} for f in fields: ff = f.replace(".", ":") # mongo does not allow "." in key strings query = {'field_label': f} res = nfcurves.find(query) ncurves = res.count() Ns = res.distinct('conductor_norm') min_norm = min(Ns) max_norm = max(Ns) query['number'] = 1 nclasses = nfcurves.count(query) field_data[ff] = { 'ncurves': ncurves, 'nclasses': nclasses, 'min_norm': min_norm, 'max_norm': max_norm, } entry = {'_id': 'conductor_norm_by_field'} ecdbstats.delete_one(entry) entry.update(field_data) ecdbstats.insert_one(entry)
def update_stats(verbose=True): L = getDBConnection().Lattices update_attribute_stats(L, 'lat', 'class_number', nocounts=True) update_attribute_stats(L, 'lat', 'dim', nocounts=True) update_attribute_stats(L, 'lat', 'det', nocounts=True)
def make_stats(): from data_mgt.utilities.rewrite import update_attribute_stats hmfs = C.hmfs form_stats = hmfs.forms.search.stats print("Updating fields stats") fields = hmfs.fields.distinct('label') degrees = hmfs.fields.distinct('degree') field_sort_key = lambda F: int(F.split(".")[2]) # by discriminant fields_by_degree = dict([(d, sorted(hmfs.fields.find({ 'degree': d }).distinct('label'), key=field_sort_key)) for d in degrees]) print("{} fields in database of degree from {} to {}".format( len(fields), min(degrees), max(degrees))) print("...summary of counts by degree...") entry = {'_id': 'fields_summary'} form_stats.delete_one(entry) field_data = { 'max': max(degrees), 'min': min(degrees), 'total': len(fields), 'counts': [[d, hmfs.fields.count({'degree': d})] for d in degrees] } entry.update(field_data) form_stats.insert_one(entry) print("...fields by degree...") entry = {'_id': 'fields_by_degree'} form_stats.delete_one(entry) for d in degrees: entry[str(d)] = { 'fields': fields_by_degree[d], 'nfields': len(fields_by_degree[d]), 'maxdisc': max(hmfs.fields.find({ 'degree': d }).distinct('discriminant')) } form_stats.insert_one(entry) print("Updating forms stats") print("counts by field degree and by dimension...") update_attribute_stats(hmfs, 'forms.search', 'deg') update_attribute_stats(hmfs, 'forms.search', 'dimension') print("counts by field degree and by level norm...") entry = {'_id': 'level_norm_by_degree'} degree_data = {} for d in degrees: res = hmfs.forms.search.find({'deg': d}) nforms = res.count() Ns = res.distinct('level_norm') min_norm = min(Ns) max_norm = max(Ns) degree_data[str(d)] = { 'nforms': nforms, 'min_norm': min_norm, 'max_norm': max_norm, } print("{}: {}".format(d, degree_data[str(d)])) form_stats.delete_one(entry) entry.update(degree_data) form_stats.insert_one(entry) print("counts by field and by level norm...") entry = {'_id': 'level_norm_by_field'} field_data = {} for f in fields: ff = f.replace(".", ":") # mongo does not allow "." in key strings res = hmfs.forms.search.find({'field_label': f}) nforms = res.count() Ns = res.distinct('level_norm') min_norm = min(Ns) max_norm = max(Ns) field_data[ff] = { 'nforms': nforms, 'min_norm': min_norm, 'max_norm': max_norm, } #print("{}: {}".format(f,field_data[ff])) form_stats.delete_one(entry) entry.update(field_data) form_stats.insert_one(entry)
'_id': doc_id, 'distinct': unique_counts, } db[coll + '.stats'].replace_one({'_id': doc_id}, doc, upsert=True) # Get database connection C = getDBConnection() db = C.curve_automorphisms ############################ # Collect count statistics # ############################ print("Collecting statistics on genus and dim attributes...") # update_attribute_stats(db, 'passports', ['genus', 'dim', 'passport_label', 'total_label']) update_attribute_stats(db, 'passports', ['genus']) update_attribute_stats(db, 'passports', ['dim']) # Count unique number of entries print( "Counting number of unique entries for passport_label and total_label attributes..." ) update_unique_count(db, 'passports', 'passport_label') update_unique_count(db, 'passports', 'total_label') ############################ # Collect joint statistics # ############################ print( "Collecting statistics on unique families, refined passports, and generating vectors per genus."
def update_stats(verbose=True): from data_mgt.utilities.rewrite import update_attribute_stats from bson.code import Code ec = C.elliptic_curves ecdbstats = ec.nfcurves.stats # get list of degrees degrees = nfcurves.distinct('degree') if verbose: print("degrees: {}".format(degrees)) # get list of signatures for each degree. Note that it would not # work to use nfcurves.find({'degree':d}).distinct('signature') # since 'signature' is currently a list of integers an mongo would # return a list of integers, not a list of lists. With hindsight # it would have been better to store the signature as a string. if verbose: print("Adding signatures_by_degree") reducer = Code("""function(key,values){return Array.sum(values);}""") attr = 'signature' mapper = Code("""function(){emit(""+this."""+attr+""",1);}""") sigs_by_deg = {} for d in degrees: sigs_by_deg[str(d)] = [ r['_id'] for r in nfcurves.inline_map_reduce(mapper,reducer,query={'degree':d})] if verbose: print("degree {} has signatures {}".format(d,sigs_by_deg[str(d)])) entry = {'_id': 'signatures_by_degree'} ecdbstats.delete_one(entry) entry.update(sigs_by_deg) ecdbstats.insert_one(entry) # get list of fields for each signature. Simple code here faster than map/reduce if verbose: print("Adding fields_by_signature") from sage.misc.flatten import flatten sigs = flatten(sigs_by_deg.values()) fields_by_sig = dict([sig,nfcurves.find({'signature':[int(x) for x in sig.split(",")]}).distinct('field_label')] for sig in sigs) entry = {'_id': 'fields_by_signature'} ecdbstats.delete_one(entry) entry.update(fields_by_sig) ecdbstats.insert_one(entry) # get list of fields for each degree if verbose: print("Adding fields_by_degree") fields_by_deg = dict([str(d),sorted(nfcurves.find({'degree':d}).distinct('field_label')) ] for d in degrees) entry = {'_id': 'fields_by_degree'} ecdbstats.delete_one(entry) entry.update(fields_by_deg) ecdbstats.insert_one(entry) fields = flatten(fields_by_deg.values()) if verbose: print("{} fields, {} signatures, {} degrees".format(len(fields),len(sigs),len(degrees))) if verbose: print("Adding curve counts for torsion order, torsion structure") update_attribute_stats(ec, 'nfcurves', ['torsion_order', 'torsion_structure']) if verbose: print("Adding curve counts by degree, signature and field") update_attribute_stats(ec, 'nfcurves', ['degree', 'signature', 'field_label']) if verbose: print("Adding class counts by degree, signature and field") update_attribute_stats(ec, 'nfcurves', ['degree', 'signature', 'field_label'], prefix="classes", filter={'number':int(1)}) # conductor norm ranges: # total: if verbose: print("Adding curve and class counts and conductor range") norms = ec.nfcurves.distinct('conductor_norm') data = {'ncurves': ec.nfcurves.count(), 'nclasses': ec.nfcurves.find({'number':1}).count(), 'min_norm': min(norms), 'max_norm': max(norms), } entry = {'_id': 'conductor_norm'} ecdbstats.delete_one(entry) entry.update(data) ecdbstats.insert_one(entry) # by degree: if verbose: print("Adding curve and class counts and conductor range, by degree") degree_data = {} for d in degrees: query = {'degree':d} res = nfcurves.find(query) ncurves = res.count() Ns = res.distinct('conductor_norm') min_norm = min(Ns) max_norm = max(Ns) query['number'] = 1 nclasses = nfcurves.count(query) degree_data[str(d)] = {'ncurves':ncurves, 'nclasses':nclasses, 'min_norm':min_norm, 'max_norm':max_norm, } entry = {'_id': 'conductor_norm_by_degree'} ecdbstats.delete_one(entry) entry.update(degree_data) ecdbstats.insert_one(entry) # by signature: if verbose: print("Adding curve and class counts and conductor range, by signature") sig_data = {} for sig in sigs: query = {'signature': [int(c) for c in sig.split(",")]} res = nfcurves.find(query) ncurves = res.count() Ns = res.distinct('conductor_norm') min_norm = min(Ns) max_norm = max(Ns) query['number'] = 1 nclasses = nfcurves.count(query) sig_data[sig] = {'ncurves':ncurves, 'nclasses':nclasses, 'min_norm':min_norm, 'max_norm':max_norm, } entry = {'_id': 'conductor_norm_by_signature'} ecdbstats.delete_one(entry) entry.update(sig_data) ecdbstats.insert_one(entry) # by field: if verbose: print("Adding curve and class counts and conductor range, by field") entry = {'_id': 'conductor_norm_by_field'} ecdbstats.delete_one(entry) field_data = {} for f in fields: ff = f.replace(".",":") # mongo does not allow "." in key strings query = {'field_label': f} res = nfcurves.find(query) ncurves = res.count() Ns = res.distinct('conductor_norm') min_norm = min(Ns) max_norm = max(Ns) query['number'] = 1 nclasses = nfcurves.count(query) field_data[ff] = {'ncurves':ncurves, 'nclasses':nclasses, 'min_norm':min_norm, 'max_norm':max_norm, } entry = {'_id': 'conductor_norm_by_field'} ecdbstats.delete_one(entry) entry.update(field_data) ecdbstats.insert_one(entry)
def make_stats(): from data_mgt.utilities.rewrite import update_attribute_stats print("Updating fields stats") fields = hmf_fields.distinct('label') degrees = hmf_fields.distinct('degree') field_sort_key = lambda F: int(F.split(".")[2]) # by discriminant fields_by_degree = dict([(d,sorted(hmf_fields.find({'degree':d}).distinct('label'),key=field_sort_key)) for d in degrees]) print("{} fields in database of degree from {} to {}".format(len(fields),min(degrees),max(degrees))) print("...summary of counts by degree...") entry = {'_id': 'fields_summary'} hmf_stats.delete_one(entry) field_data = {'max': max(degrees), 'min': min(degrees), 'total': len(fields), 'counts': [[d,hmf_fields.count({'degree': d})] for d in degrees] } entry.update(field_data) hmf_stats.insert_one(entry) print("...fields by degree...") entry = {'_id': 'fields_by_degree'} hmf_stats.delete_one(entry) for d in degrees: entry[str(d)] = {'fields': fields_by_degree[d], 'nfields': len(fields_by_degree[d]), 'maxdisc': max(hmf_fields.find({'degree':d}).distinct('discriminant')) } hmf_stats.insert_one(entry) print("Updating forms stats") print("counts by field degree and by dimension...") update_attribute_stats(hmfs, 'forms', 'deg') update_attribute_stats(hmfs, 'forms', 'dimension') print("counts by field degree and by level norm...") entry = {'_id': 'level_norm_by_degree'} degree_data = {} for d in degrees: res = hmf_forms.find({'deg':d}) nforms = res.count() Ns = res.distinct('level_norm') min_norm = min(Ns) max_norm = max(Ns) degree_data[str(d)] = {'nforms':nforms, 'min_norm':min_norm, 'max_norm':max_norm, } print("{}: {}".format(d,degree_data[str(d)])) hmf_stats.delete_one(entry) entry.update(degree_data) hmf_stats.insert_one(entry) print("counts by field and by level norm...") entry = {'_id': 'level_norm_by_field'} field_data = {} for f in fields: ff = f.replace(".",":") # mongo does not allow "." in key strings res = hmf_forms.find({'field_label': f}) nforms = res.count() Ns = res.distinct('level_norm') min_norm = min(Ns) max_norm = max(Ns) field_data[ff] = {'nforms':nforms, 'min_norm':min_norm, 'max_norm':max_norm, } #print("{}: {}".format(f,field_data[ff])) hmf_stats.delete_one(entry) entry.update(field_data) hmf_stats.insert_one(entry)
def update_stats(verbose=True): h = getDBConnection().hecke_algebras update_attribute_stats(h,'hecke_algebras','level', nocounts=True) update_attribute_stats(h,'hecke_algebras','weight', nocounts=True)
def update_stats(verbose=True): h = getDBConnection().hecke_algebras update_attribute_stats(h, 'hecke_algebras', 'level', nocounts=True) update_attribute_stats(h, 'hecke_algebras', 'weight', nocounts=True)
def update_stats(verbose=True): L = getDBConnection().Lattices update_attribute_stats(L,'lat','class_number', nocounts=True) update_attribute_stats(L,'lat','dim', nocounts=True) update_attribute_stats(L,'lat','det', nocounts=True)