def get_deprecated_snl(snl_id, colls): snl_old = colls.snl.find_one({'snl_id': snl_id}) del snl_old['about']['_icsd'] snl_old['about']['remarks'].append( 'Record updated (about._icsd deleted) {}'.format( datetime.datetime.now().strftime('%Y-%m-%d'))) return MPStructureNL.from_dict(snl_old)
def submit_all_snl(min=None, max=None): constraints = { 'is_ordered': True, 'is_valid': True, 'nsites': { '$lte': 200 }, 'canonical_snl.about.projects': { '$ne': 'CederDahn Challenge' } } constraints['elements'] = {'$nin': NO_POTCARS} constraints['canonical_snl.about.history.name'] = { "$ne": "Materials Project structure optimization" } constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"} if min and max: constraints['snlgroup_id'] = {'$gte': min, '$lte': max} elif min or max: raise ValueError('Must specify both min AND max if you specify one') snldb = SNLMongoAdapter.auto_load() sma = SubmissionMongoAdapter.auto_load() for result in snldb.snlgroups.find(constraints, { 'canonical_snl': 1, 'snlgroup_id': 1 }): snl = MPStructureNL.from_dict(result['canonical_snl']) parameters = {'snlgroup_id': result['snlgroup_id']} sma.submit_snl(snl, 'Anubhav Jain <*****@*****.**>', parameters=parameters)
def add_snl(self, snl, force_new=False, snlgroup_guess=None): try: self.lock_db() snl_id = self._get_next_snl_id() spstruc = snl.structure.copy() spstruc.remove_oxidation_states() sf = SymmetryFinder(spstruc, SPACEGROUP_TOLERANCE) sf.get_spacegroup() sgnum = sf.get_spacegroup_number() if sf.get_spacegroup_number() \ else -1 sgsym = sf.get_spacegroup_symbol() if sf.get_spacegroup_symbol() \ else 'unknown' sghall = sf.get_hall() if sf.get_hall() else 'unknown' sgxtal = sf.get_crystal_system() if sf.get_crystal_system() \ else 'unknown' sglatt = sf.get_lattice_type() if sf.get_lattice_type() else 'unknown' sgpoint = unicode(sf.get_point_group(), errors="ignore") mpsnl = MPStructureNL.from_snl(snl, snl_id, sgnum, sgsym, sghall, sgxtal, sglatt, sgpoint) snlgroup, add_new, spec_group = self.add_mpsnl(mpsnl, force_new, snlgroup_guess) self.release_lock() return mpsnl, snlgroup.snlgroup_id, spec_group except: self.release_lock() traceback.print_exc() raise ValueError("Error while adding SNL!")
def process_item(self, item, index): nrow, ncol, snlgroups = super(SNLGroupMemberChecker, self).process_item(item, index) for snlgroup_id in item['snlgroup_ids']: local_mismatch_dict = dict((k,[]) for k in categories[self.checker_name]) snlgrp = snlgroups[snlgroup_id] mismatch_snls = [] entry = '%d,%d:' % (snlgrp.snlgroup_id, snlgrp.canonical_snl.snl_id) for idx,snl_id in enumerate(snlgrp.all_snl_ids): if snl_id == snlgrp.canonical_snl.snl_id: continue try: mpsnl_dict = self._snls.collection.find_one({'snl_id': snl_id}) mpsnl = MPStructureNL.from_dict(mpsnl_dict) except: exc_type, exc_value, exc_traceback = sys.exc_info() _log.info('%r %r', exc_type, exc_value) local_mismatch_dict[categories[self.checker_name][-1]].append('%s%d' % (entry, snl_id)) continue if self._matcher.fit(mpsnl.structure, snlgrp.canonical_structure): continue mismatch_snls.append(str(snl_id)) _log.info('%s %d', entry, snl_id) if len(mismatch_snls) > 0: full_entry = '%s%s' % (entry, ','.join(mismatch_snls)) local_mismatch_dict[categories[self.checker_name][0]].append(full_entry) _log.info('(%d) %r', self._counter_total.value, local_mismatch_dict) self._increase_counter(nrow, ncol, local_mismatch_dict)
def add_snl(self, snl, force_new=False, snlgroup_guess=None): try: self.lock_db() snl_id = self._get_next_snl_id() spstruc = snl.structure.copy() spstruc.remove_oxidation_states() sf = SpacegroupAnalyzer(spstruc, SPACEGROUP_TOLERANCE) sf.get_spacegroup() sgnum = sf.get_spacegroup_number() if sf.get_spacegroup_number() \ else -1 sgsym = sf.get_spacegroup_symbol() if sf.get_spacegroup_symbol() \ else 'unknown' sghall = sf.get_hall() if sf.get_hall() else 'unknown' sgxtal = sf.get_crystal_system() if sf.get_crystal_system() \ else 'unknown' sglatt = sf.get_lattice_type() if sf.get_lattice_type( ) else 'unknown' sgpoint = sf.get_point_group() mpsnl = MPStructureNL.from_snl(snl, snl_id, sgnum, sgsym, sghall, sgxtal, sglatt, sgpoint) snlgroup, add_new, spec_group = self.add_mpsnl( mpsnl, force_new, snlgroup_guess) self.release_lock() return mpsnl, snlgroup.snlgroup_id, spec_group except: self.release_lock() traceback.print_exc() raise ValueError("Error while adding SNL!")
def add_snl(self, snl): snl_id = self._get_next_snl_id() sf = SymmetryFinder(snl.structure, SPACEGROUP_TOLERANCE) sf.get_spacegroup() mpsnl = MPStructureNL.from_snl(snl, snl_id, sf.get_spacegroup_number(), sf.get_spacegroup_symbol(), sf.get_hall(), sf.get_crystal_system(), sf.get_lattice_type()) snlgroup, add_new = self.add_mpsnl(mpsnl) return mpsnl, snlgroup.snlgroup_id
def process_item(self, item, index): nrow, ncol, snlgroups = super(SNLGroupIcsdChecker, self).process_item(item, index) for idx,primary_id in enumerate(item['snlgroup_ids'][:-1]): cat_key = '' local_mismatch_dict = dict((k,[]) for k in categories[self.checker_name]) primary_group = snlgroups[primary_id] primary_mpsnl_dicts = self._snls.collection.find( *self.get_snl_query(primary_group.all_snl_ids)) for secondary_id in item['snlgroup_ids'][idx+1:]: secondary_group = snlgroups[secondary_id] secondary_mpsnl_dicts = self._snls.collection.find( *self.get_snl_query(secondary_group.all_snl_ids)) for primary_mpsnl_dict in primary_mpsnl_dicts: primary_icsd_id = primary_mpsnl_dict['about']['_icsd']['icsd_id'] for secondary_mpsnl_dict in secondary_mpsnl_dicts: secondary_icsd_id = secondary_mpsnl_dict['about']['_icsd']['icsd_id'] if primary_icsd_id != secondary_icsd_id: continue cat_key = 'same ICSDs' primary_structure = MPStructureNL.from_dict(primary_mpsnl_dict).structure secondary_structure = MPStructureNL.from_dict(secondary_mpsnl_dict).structure match = self._matcher.fit(primary_structure, secondary_structure) if match: primary_match = self._matcher.fit( primary_structure, primary_group.canonical_structure) secondary_match = self._matcher.fit( secondary_structure, secondary_group.canonical_structure) canonical_match = self._matcher.fit( primary_group.canonical_structure, secondary_group.canonical_structure) local_mismatch_dict[cat_key].append( '({}, {}): ({}, {}) -> {} ({}{})'.format( primary_id, secondary_id, primary_mpsnl_dict['snl_id'], secondary_mpsnl_dict['snl_id'], primary_icsd_id, match, '/{}/{}/{}'.format( primary_match, secondary_match, canonical_match ) if match else '' ) ) if cat_key: _log.info('(%d) %r', self._counter_total.value, local_mismatch_dict) self._increase_counter(nrow, ncol, local_mismatch_dict)
def submit_new_workflow(self): # finds a submitted job, creates a workflow, and submits it to FireWorks job = self.jobs.find_and_modify({'state': 'SUBMITTED'}, {'$set': { 'state': 'WAITING' }}) if job: submission_id = job['submission_id'] try: if 'snl_id' in job: snl = MPStructureNL.from_dict(job) else: snl = StructureNL.from_dict(job) if len(snl.structure.sites) > SubmissionProcessor.MAX_SITES: self.sma.update_state(submission_id, 'REJECTED', 'too many sites', {}) print 'REJECTED WORKFLOW FOR {} - too many sites ({})'.format( snl.structure.formula, len(snl.structure.sites)) elif not job['is_valid']: self.sma.update_state( submission_id, 'REJECTED', 'invalid structure (atoms too close)', {}) print 'REJECTED WORKFLOW FOR {} - invalid structure'.format( snl.structure.formula) elif len(set(NO_POTCARS) & set(job['elements'])) > 0: self.sma.update_state(submission_id, 'REJECTED', 'invalid structure (no POTCAR)', {}) print 'REJECTED WORKFLOW FOR {} - invalid element (No POTCAR)'.format( snl.structure.formula) elif not job['is_ordered']: self.sma.update_state(submission_id, 'REJECTED', 'invalid structure (disordered)', {}) print 'REJECTED WORKFLOW FOR {} - invalid structure'.format( snl.structure.formula) else: snl.data['_materialsproject'] = snl.data.get( '_materialsproject', {}) snl.data['_materialsproject'][ 'submission_id'] = submission_id # create a workflow if "Elasticity" in snl.projects: wf = snl_to_wf_elastic(snl, job['parameters']) else: wf = snl_to_wf(snl, job['parameters']) self.launchpad.add_wf(wf) print 'ADDED WORKFLOW FOR {}'.format(snl.structure.formula) except: self.jobs.find_and_modify({'submission_id': submission_id}, {'$set': { 'state': 'ERROR' }}) traceback.print_exc() return submission_id
def submit_all_snl(snldb, sma, snlgroup_constraint=None): constraints = {'is_ordered': True, 'is_valid': True, 'nsites': {'$lte': 200}, 'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'}} constraints['elements'] = {'$nin': NO_POTCARS} constraints['canonical_snl.about.history.name'] = {"$ne":"Materials Project structure optimization"} constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"} if snlgroup_constraint: constraints['snlgroup_id'] = snlgroup_constraint for result in snldb.snlgroups.find(constraints, {'canonical_snl': 1, 'snlgroup_id': 1}): snl = MPStructureNL.from_dict(result['canonical_snl']) parameters = {'snlgroup_id': result['snlgroup_id']} sma.submit_snl(snl, 'Anubhav Jain <*****@*****.**>', parameters=parameters)
def submit_new_workflow(self): # finds a submitted job, creates a workflow, and submits it to FireWorks job = self.jobs.find_and_modify({'state': 'SUBMITTED'}, {'$set': {'state': 'WAITING'}}) if job: submission_id = job['submission_id'] try: if 'snl_id' in job: snl = MPStructureNL.from_dict(job) else: snl = StructureNL.from_dict(job) if len(snl.structure.sites) > SubmissionProcessor.MAX_SITES: self.sma.update_state(submission_id, 'REJECTED', 'too many sites', {}) print 'REJECTED WORKFLOW FOR {} - too many sites ({})'.format( snl.structure.formula, len(snl.structure.sites)) elif not job['is_valid']: self.sma.update_state(submission_id, 'REJECTED', 'invalid structure (atoms too close)', {}) print 'REJECTED WORKFLOW FOR {} - invalid structure'.format( snl.structure.formula) elif len(set(NO_POTCARS) & set(job['elements'])) > 0: self.sma.update_state(submission_id, 'REJECTED', 'invalid structure (no POTCAR)', {}) print 'REJECTED WORKFLOW FOR {} - invalid element (No POTCAR)'.format( snl.structure.formula) elif not job['is_ordered']: self.sma.update_state(submission_id, 'REJECTED', 'invalid structure (disordered)', {}) print 'REJECTED WORKFLOW FOR {} - invalid structure'.format( snl.structure.formula) else: snl.data['_materialsproject'] = snl.data.get('_materialsproject', {}) snl.data['_materialsproject']['submission_id'] = submission_id # create a workflow if "Elasticity" in snl.projects: from mpworks.workflows.snl_to_wf_phonon import snl_to_wf_phonon wf=snl_to_wf_phonon(snl, job['parameters']) else: wf = snl_to_wf(snl, job['parameters']) self.launchpad.add_wf(wf) print 'ADDED WORKFLOW FOR {}'.format(snl.structure.formula) except: self.jobs.find_and_modify({'submission_id': submission_id}, {'$set': {'state': 'ERROR'}}) traceback.print_exc() return submission_id
def add_snl(self, snl, force_new=False, snlgroup_guess=None): snl_id = self._get_next_snl_id() sf = SymmetryFinder(snl.structure, SPACEGROUP_TOLERANCE) sf.get_spacegroup() sgnum = sf.get_spacegroup_number() if sf.get_spacegroup_number() \ else -1 sgsym = sf.get_spacegroup_symbol() if sf.get_spacegroup_symbol() \ else 'unknown' sghall = sf.get_hall() if sf.get_hall() else 'unknown' sgxtal = sf.get_crystal_system() if sf.get_crystal_system() \ else 'unknown' sglatt = sf.get_lattice_type() if sf.get_lattice_type() else 'unknown' sgpoint = unicode(sf.get_point_group(), errors="ignore") mpsnl = MPStructureNL.from_snl(snl, snl_id, sgnum, sgsym, sghall, sgxtal, sglatt, sgpoint) snlgroup, add_new = self.add_mpsnl(mpsnl, force_new, snlgroup_guess) return mpsnl, snlgroup.snlgroup_id
def submit_all_snl(min=None, max=None): constraints = {'is_ordered': True, 'is_valid': True, 'nsites': {'$lte': 200}, 'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'}} constraints['elements'] = {'$nin': NO_POTCARS} constraints['canonical_snl.about.history.name'] = {"$ne":"Materials Project structure optimization"} constraints['canonical_snl.about.remarks'] = {"$ne": "DEPRECATED"} if min and max: constraints['snlgroup_id'] = {'$gte': min, '$lte': max} elif min or max: raise ValueError('Must specify both min AND max if you specify one') snldb = SNLMongoAdapter.auto_load() sma = SubmissionMongoAdapter.auto_load() for result in snldb.snlgroups.find(constraints, {'canonical_snl': 1, 'snlgroup_id': 1}): snl = MPStructureNL.from_dict(result['canonical_snl']) parameters = {'snlgroup_id': result['snlgroup_id']} sma.submit_snl(snl, 'Anubhav Jain <*****@*****.**>', parameters=parameters)
def find_alternate_canonical(): # see if we can replace a deprecated canonical SNL with a non-deprecated one module_dir = os.path.dirname(os.path.abspath(__file__)) snl_f = os.path.join(module_dir, 'snl.yaml') snldb = SNLMongoAdapter.from_file(snl_f) snl = snldb.snl snlgroups = snldb.snlgroups for g in snlgroups.find({"canonical_snl.about.remarks":"DEPRECATED"}, {"snlgroup_id": 1, "all_snl_ids": 1}): for s in snl.find({"snl_id": {"$in": g['all_snl_ids']}, "about.remarks": {"$ne": "DEPRECATED"}}): canonical_mpsnl = MPStructureNL.from_dict(s) snldb.switch_canonical_snl(g['snlgroup_id'], canonical_mpsnl) print g['snlgroup_id'] break print 'DONE'
def process_item(self, item, index): nrow, ncol, snlgroups = super(SNLSpaceGroupChecker, self).process_item(item, index) local_mismatch_dict = dict((k,[]) for k in categories[self.checker_name]) category = '' try: mpsnl_dict = self._snls.collection.find_one({ 'snl_id': item }) mpsnl = MPStructureNL.from_dict(mpsnl_dict) mpsnl.structure.remove_oxidation_states() sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1) if sf.get_spacegroup_number() != mpsnl.sg_num: category = categories[self.checker_name][int(sf.get_spacegroup_number() == 0)] except: exc_type, exc_value, exc_traceback = sys.exc_info() category = categories[0][2] if category: local_mismatch_dict[category].append(str(item)) _log.info('(%d) %r', self._counter_total.value, local_mismatch_dict) self._increase_counter(nrow, ncol, local_mismatch_dict)
def check_snl_spacegroups(args): """check spacegroups of all available SNLs""" range_index = args.start / num_ids_per_stream idxs = [range_index * 2] idxs += [idxs[0] + 1] s = [py.Stream(stream_ids[i]) for i in idxs] for i in range(len(idxs)): s[i].open() end = num_snls if args.end > num_snls else args.end id_range = {"$gt": args.start, "$lte": end} mpsnl_cursor = sma.snl.find({"snl_id": id_range}) num_good_ids = 0 colors = [] for mpsnl_dict in mpsnl_cursor: start_time = time.clock() exc_raised = False try: mpsnl = MPStructureNL.from_dict(mpsnl_dict) sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1) except: exc_type, exc_value, exc_traceback = sys.exc_info() exc_raised = True is_good = (not exc_raised and sf.get_spacegroup_number() == mpsnl.sg_num) if is_good: # Bar (good) num_good_ids += 1 data = dict(x=[num_good_ids], y=[range_index]) else: # Scatter (bad) if exc_raised: category = 2 if fnmatch(str(exc_type), '*pybtex*') else 3 text = ' '.join([str(exc_type), str(exc_value)]) else: category = int(sf.get_spacegroup_number() == 0) text = '%s: %d' % (mpsnl.snlgroup_key, sf.get_spacegroup_number()) colors.append(category_colors[category]) data = dict(x=mpsnl_dict['snl_id'] % num_ids_per_stream, y=range_index, text=text, marker=Marker(color=colors)) s[is_good].write(data) for i in range(len(idxs)): s[i].close()
def resubmit(self, submission_id, snl_db=None): # see if an SNL object has already been created if not snl_db: snl_db = SNLMongoAdapter.auto_load() mpsnl = None snlgroup_id = None snl_dict = snl_db.snl.find_one({"about._materialsproject.submission_id": submission_id}) if snl_dict: mpsnl = MPStructureNL.from_dict(snl_dict) snlgroup_id = snl_db.snlgroups.find_one({"all_snl_ids": snl_dict['snl_id']}, {"snlgroup_id":1})['snlgroup_id'] # Now reset the current submission parameters updates = {'state': 'SUBMITTED', 'state_details': {}, 'task_dict': {}} if mpsnl: updates['parameters'] = self.jobs.find_one({'submission_id': submission_id}, {'parameters': 1})['parameters'] updates['parameters'].update({"mpsnl": mpsnl.as_dict(), "snlgroup_id": snlgroup_id}) self.jobs.find_and_modify({'submission_id': submission_id}, {'$set': updates})
def resubmit(self, submission_id, snl_db=None): # see if an SNL object has already been created if not snl_db: snl_db = SNLMongoAdapter.auto_load() mpsnl = None snlgroup_id = None snl_dict = snl_db.snl.find_one({"about._materialsproject.submission_id": submission_id}) if snl_dict: mpsnl = MPStructureNL.from_dict(snl_dict) snlgroup_id = snl_db.snlgroups.find_one({"all_snl_ids": snl_dict['snl_id']}, {"snlgroup_id":1})['snlgroup_id'] # Now reset the current submission parameters updates = {'state': 'SUBMITTED', 'state_details': {}, 'task_dict': {}} if mpsnl: updates['parameters'] = self.jobs.find_one({'submission_id': submission_id}, {'parameters': 1})['parameters'] updates['parameters'].update({"mpsnl": mpsnl.to_dict, "snlgroup_id": snlgroup_id}) self.jobs.find_and_modify({'submission_id': submission_id}, {'$set': updates})
def check_snl_spacegroups(args): """check spacegroups of all available SNLs""" range_index = args.start / num_ids_per_stream idxs = [range_index*2] idxs += [idxs[0]+1] s = [py.Stream(stream_ids[i]) for i in idxs] for i in range(len(idxs)): s[i].open() end = num_snls if args.end > num_snls else args.end id_range = {"$gt": args.start, "$lte": end} mpsnl_cursor = sma.snl.find({ "snl_id": id_range}) num_good_ids = 0 colors=[] for mpsnl_dict in mpsnl_cursor: start_time = time.clock() exc_raised = False try: mpsnl = MPStructureNL.from_dict(mpsnl_dict) sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1) except: exc_type, exc_value, exc_traceback = sys.exc_info() exc_raised = True is_good = (not exc_raised and sf.get_spacegroup_number() == mpsnl.sg_num) if is_good: # Bar (good) num_good_ids += 1 data = dict(x=[num_good_ids], y=[range_index]) else: # Scatter (bad) if exc_raised: category = 2 if fnmatch(str(exc_type), '*pybtex*') else 3 text = ' '.join([str(exc_type), str(exc_value)]) else: category = int(sf.get_spacegroup_number() == 0) text = '%s: %d' % (mpsnl.snlgroup_key, sf.get_spacegroup_number()) colors.append(category_colors[category]) data = dict( x=mpsnl_dict['snl_id']%num_ids_per_stream, y=range_index, text=text, marker=Marker(color=colors) ) s[is_good].write(data) for i in range(len(idxs)): s[i].close()
def get_deprecated_snl(snl_id, colls): snl_old = colls.snl.find_one({'snl_id': snl_id}) del snl_old['about']['_icsd'] snl_old['about']['remarks'].append('Record updated (about._icsd deleted) {}'.format(datetime.datetime.now().strftime('%Y-%m-%d'))) return MPStructureNL.from_dict(snl_old)
def check_snls_in_snlgroups(args): """check whether SNLs in each SNLGroup still match resp. canonical SNL""" range_index = args.start / num_ids_per_stream idxs = [2*(num_snl_streams+range_index)] idxs += [idxs[0]+1] s = [py.Stream(stream_ids[i]) for i in idxs] for i in range(len(idxs)): s[i].open() end = num_snlgroups if args.end > num_snlgroups else args.end id_range = {"$gt": args.start, "$lte": end} snlgrp_cursor = sma.snlgroups.find({ "snlgroup_id": id_range}) colors = [] num_good_ids = 0 for snlgrp_dict in snlgrp_cursor: start_time = time.clock() try: snlgrp = SNLGroup.from_dict(snlgrp_dict) except: exc_type, exc_value, exc_traceback = sys.exc_info() text = ' '.join([str(exc_type), str(exc_value)]) colors.append(category_colors[-1]) # Others data = dict( x=snlgrp_dict['snlgroup_id']%num_ids_per_stream, y=range_index, text=text, marker=Marker(color=colors) ) s[0].write(data) sleep(start_time) continue if len(snlgrp.all_snl_ids) <= 1: num_good_ids += 1 data = dict(x=[num_good_ids], y=[range_index]) s[1].write(data) sleep(start_time) continue exc_raised = False all_snls_good = True for snl_id in snlgrp.all_snl_ids: if snl_id == snlgrp.canonical_snl.snl_id: continue mpsnl_dict = sma.snl.find_one({ "snl_id": snl_id }) try: mpsnl = MPStructureNL.from_dict(mpsnl_dict) is_match = matcher.fit(mpsnl.structure, snlgrp.canonical_structure) except: exc_type, exc_value, exc_traceback = sys.exc_info() exc_raised = True if exc_raised or not is_match: # Scatter (bad) if exc_raised: category = 2 if fnmatch(str(exc_type), '*pybtex*') else 3 text = ' '.join([str(exc_type), str(exc_value)]) else: category = 0 text = '%d != can:%d' % (mpsnl_dict['snl_id'], snlgrp.canonical_snl.snl_id) colors.append(category_colors[category]) data = dict( x=snlgrp_dict['snlgroup_id']%num_ids_per_stream, y=range_index, text=text, marker=Marker(color=colors) ) s[0].write(data) all_snls_good = False sleep(start_time) break if all_snls_good: # Bar (good) num_good_ids += 1 data = dict(x=[num_good_ids], y=[range_index]) s[1].write(data) sleep(start_time) for i in range(len(idxs)): s[i].close()
def analyze(args): """analyze data at any point for a copy of the streaming figure""" # NOTE: make copy online first with suffix _%Y-%m-%d and note figure id fig = py.get_figure(creds['username'], args.fig_id) if args.t: if args.fig_id == 42: label_entries = filter(None, '<br>'.join(fig['data'][2]['text']).split('<br>')) pairs = map(make_tuple, label_entries) grps = set(chain.from_iterable(pairs)) snlgrp_cursor = sma.snlgroups.aggregate([ { '$match': { 'snlgroup_id': { '$in': list(grps) }, 'canonical_snl.about.projects': {'$ne': 'CederDahn Challenge'} } }, { '$project': { 'snlgroup_id': 1, 'canonical_snl.snlgroup_key': 1, '_id': 0 } } ], cursor={}) snlgroup_keys = {} for d in snlgrp_cursor: snlgroup_keys[d['snlgroup_id']] = d['canonical_snl']['snlgroup_key'] print snlgroup_keys[40890] sma2 = SNLMongoAdapter.from_file( os.path.join(os.environ['DB_LOC'], 'materials_db.yaml') ) materials_cursor = sma2.database.materials.aggregate([ { '$match': { 'snlgroup_id_final': { '$in': list(grps) }, 'snl_final.about.projects': {'$ne': 'CederDahn Challenge'} } }, { '$project': { 'snlgroup_id_final': 1, '_id': 0, 'task_id': 1, 'final_energy_per_atom': 1, 'band_gap.search_gap.band_gap': 1, 'volume': 1, 'nsites': 1 }} ], cursor={}) snlgroup_data = {} for material in materials_cursor: snlgroup_id = material['snlgroup_id_final'] final_energy_per_atom = material['final_energy_per_atom'] band_gap = material['band_gap']['search_gap']['band_gap'] volume_per_atom = material['volume'] / material['nsites'] snlgroup_data[snlgroup_id] = { 'final_energy_per_atom': final_energy_per_atom, 'band_gap': band_gap, 'task_id': material['task_id'], 'volume_per_atom': volume_per_atom } print snlgroup_data[40890] filestem = 'mpworks/check_snl/results/bad_snlgroups_2_' with open(filestem+'in_matdb.csv', 'wb') as f, \ open(filestem+'notin_matdb.csv', 'wb') as g: writer1, writer2 = csv.writer(f), csv.writer(g) header = [ 'category', 'composition', 'snlgroup_id 1', 'sg_num 1', 'task_id 1', 'snlgroup_id 2', 'sg_num 2', 'task_id 2', 'delta_energy', 'delta_bandgap', 'delta_volume_per_atom', 'rms_dist', 'scenario' ] writer1.writerow(header) writer2.writerow(header) for primary_id, secondary_id in pairs: if primary_id not in snlgroup_keys or \ secondary_id not in snlgroup_keys: continue composition, primary_sg_num = snlgroup_keys[primary_id].split('--') secondary_sg_num = snlgroup_keys[secondary_id].split('--')[1] category = 'same SGs' if primary_sg_num == secondary_sg_num else 'diff. SGs' if primary_id not in snlgroup_data or secondary_id not in snlgroup_data: delta_energy, delta_bandgap, delta_volume_per_atom = '', '', '' else: delta_energy = "{0:.3g}".format(abs( snlgroup_data[primary_id]['final_energy_per_atom'] - \ snlgroup_data[secondary_id]['final_energy_per_atom'] )) delta_bandgap = "{0:.3g}".format(abs( snlgroup_data[primary_id]['band_gap'] - \ snlgroup_data[secondary_id]['band_gap'] )) delta_volume_per_atom = "{0:.3g}".format(abs( snlgroup_data[primary_id]['volume_per_atom'] - \ snlgroup_data[secondary_id]['volume_per_atom'] )) scenario, rms_dist_str = '', '' if category == 'diff. SGs' and delta_energy and delta_bandgap: scenario = 'different' if ( float(delta_energy) > 0.01 or float(delta_bandgap) > 0.1 ) else 'similar' snlgrp1_dict = sma.snlgroups.find_one({ "snlgroup_id": primary_id }) snlgrp2_dict = sma.snlgroups.find_one({ "snlgroup_id": secondary_id }) snlgrp1 = SNLGroup.from_dict(snlgrp1_dict) snlgrp2 = SNLGroup.from_dict(snlgrp2_dict) primary_structure = snlgrp1.canonical_structure secondary_structure = snlgrp2.canonical_structure rms_dist = matcher.get_rms_dist(primary_structure, secondary_structure) if rms_dist is not None: rms_dist_str = "({0:.3g},{1:.3g})".format(*rms_dist) print rms_dist_str row = [ category, composition, primary_id, primary_sg_num, snlgroup_data[primary_id]['task_id'] \ if primary_id in snlgroup_data else '', secondary_id, secondary_sg_num, snlgroup_data[secondary_id]['task_id'] \ if secondary_id in snlgroup_data else '', delta_energy, delta_bandgap, delta_volume_per_atom, rms_dist_str, scenario ] if delta_energy and delta_bandgap: writer1.writerow(row) else: writer2.writerow(row) elif args.fig_id == 16: out_fig = Figure() badsnls_trace = Scatter(x=[], y=[], text=[], mode='markers', name='SG Changes') bisectrix = Scatter(x=[0,230], y=[0,230], mode='lines', name='bisectrix') print 'pulling bad snls from plotly ...' bad_snls = OrderedDict() for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']): for snl_id in map(int, text.split('<br>')): bad_snls[snl_id] = category with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f: print 'pulling bad snls from database ...' mpsnl_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snls.keys() }, 'about.projects': {'$ne': 'CederDahn Challenge'} }) writer = csv.writer(f) writer.writerow([ 'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors' ]) print 'writing bad snls to file ...' for mpsnl_dict in mpsnl_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ] row += _get_snl_extra_info(mpsnl) writer.writerow(row) sg_num = mpsnl.snlgroup_key.split('--')[1] if (bad_snls[mpsnl.snl_id] == 'SG default' and sg_num != '-1') or \ bad_snls[mpsnl.snl_id] == 'SG change': mpsnl.structure.remove_oxidation_states() sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1) badsnls_trace['x'].append(mpsnl.sg_num) badsnls_trace['y'].append(sf.get_spacegroup_number()) badsnls_trace['text'].append(mpsnl.snl_id) if bad_snls[mpsnl.snl_id] == 'SG default': print sg_num, sf.get_spacegroup_number() print 'plotting out-fig ...' out_fig['data'] = Data([bisectrix, badsnls_trace]) out_fig['layout'] = Layout( showlegend=False, hovermode='closest', title='Spacegroup Assignment Changes', xaxis=XAxis(showgrid=False, title='old SG number', range=[0,230]), yaxis=YAxis(showgrid=False, title='new SG number', range=[0,230]), ) filename = 'spacegroup_changes_' filename += datetime.datetime.now().strftime('%Y-%m-%d') py.plot(out_fig, filename=filename, auto_open=False) elif args.fig_id == 43: # SNLGroupMemberChecker matcher2 = StructureMatcher( ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, comparator=ElementComparator() ) print 'pulling data from plotly ...' trace = Scatter(x=[], y=[], text=[], mode='markers', name='mismatches') bad_snls = OrderedDict() # snlgroup_id : [ mismatching snl_ids ] for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']): if category != 'mismatch': continue for entry in text.split('<br>'): fields = entry.split(':') snlgroup_id = int(fields[0].split(',')[0]) print snlgroup_id snlgrp_dict = sma.snlgroups.find_one({ 'snlgroup_id': snlgroup_id }) snlgrp = SNLGroup.from_dict(snlgrp_dict) s1 = snlgrp.canonical_structure.get_primitive_structure() bad_snls[snlgroup_id] = [] for i, snl_id in enumerate(fields[1].split(',')): mpsnl_dict = sma.snl.find_one({ 'snl_id': int(snl_id) }) if 'CederDahn Challenge' in mpsnl_dict['about']['projects']: print 'skip CederDahn: %s' % snl_id continue mpsnl = MPStructureNL.from_dict(mpsnl_dict) s2 = mpsnl.structure.get_primitive_structure() is_match = matcher2.fit(s1, s2) if is_match: continue bad_snls[snlgroup_id].append(snl_id) trace['x'].append(snlgroup_id) trace['y'].append(i+1) trace['text'].append(snl_id) if len(bad_snls[snlgroup_id]) < 1: bad_snls.pop(snlgroup_id, None) with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f: print 'pulling bad snlgroups from database ...' snlgroup_cursor = sma.snlgroups.find({ 'snlgroup_id': { '$in': bad_snls.keys() }, }) writer = csv.writer(f) writer.writerow(['snlgroup_id', 'snlgroup_key', 'mismatching snl_ids']) print 'writing bad snlgroups to file ...' for snlgroup_dict in snlgroup_cursor: snlgroup = SNLGroup.from_dict(snlgroup_dict) row = [ snlgroup.snlgroup_id, snlgroup.canonical_snl.snlgroup_key, ' '.join(bad_snls[snlgroup.snlgroup_id]) ] writer.writerow(row) print 'plotting out-fig ...' out_fig = Figure() out_fig['data'] = Data([trace]) out_fig['layout'] = Layout( showlegend=False, hovermode='closest', title='Member Mismatches of SNLGroup Canonicals', xaxis=XAxis(showgrid=False, title='snlgroup_id', showexponent='none'), yaxis=YAxis(showgrid=False, title='# mismatching SNLs'), ) filename = 'groupmember_mismatches_' filename += datetime.datetime.now().strftime('%Y-%m-%d') py.plot(out_fig, filename=filename, auto_open=False) else: errors = Counter() bad_snls = OrderedDict() bad_snlgroups = OrderedDict() for i,d in enumerate(fig['data']): if not isinstance(d, Scatter): continue if not 'x' in d or not 'y' in d or not 'text' in d: continue start_id = int(d['name'].split(' - ')[0][:-1])*1000 marker_colors = d['marker']['color'] if i < 2*num_snl_streams: # spacegroups errors += Counter(marker_colors) for idx,color in enumerate(marker_colors): snl_id = start_id + d['x'][idx] color_index = category_colors.index(color) category = categories[color_index] bad_snls[snl_id] = category else: # groupmembers for idx,color in enumerate(marker_colors): if color != category_colors[0]: continue snlgroup_id = start_id + d['x'][idx] mismatch_snl_id, canonical_snl_id = d['text'][idx].split(' != ') bad_snlgroups[snlgroup_id] = int(mismatch_snl_id) print errors fig_data = fig['data'][-1] fig_data['x'] = [ errors[color] for color in fig_data['marker']['color'] ] filename = _get_filename() print filename #py.plot(fig, filename=filename) with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f: mpsnl_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snls.keys() } }) writer = csv.writer(f) writer.writerow([ 'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors' ]) for mpsnl_dict in mpsnl_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ] row += _get_snl_extra_info(mpsnl) writer.writerow(row) with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f: snlgrp_cursor = sma.snlgroups.find({ 'snlgroup_id': { '$in': bad_snlgroups.keys() } }) first_mismatch_snls_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snlgroups.values() } }) first_mismatch_snl_info = OrderedDict() for mpsnl_dict in first_mismatch_snls_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) first_mismatch_snl_info[mpsnl.snl_id] = _get_snl_extra_info(mpsnl) writer = csv.writer(f) writer.writerow([ 'snlgroup_id', 'snlgroup_key', 'canonical_snl_id', 'first_mismatching_snl_id', 'nsites', 'remarks', 'projects', 'authors' ]) for snlgrp_dict in snlgrp_cursor: snlgrp = SNLGroup.from_dict(snlgrp_dict) first_mismatch_snl_id = bad_snlgroups[snlgrp.snlgroup_id] row = [ snlgrp.snlgroup_id, snlgrp.canonical_snl.snlgroup_key, snlgrp.canonical_snl.snl_id, first_mismatch_snl_id ] row += [ ' & '.join(pair) if pair[0] != pair[1] else pair[0] for pair in zip( _get_snl_extra_info(snlgrp.canonical_snl), first_mismatch_snl_info[int(first_mismatch_snl_id)] ) ] writer.writerow(row)
module_dir = os.path.dirname(os.path.abspath(__file__)) snl_f = os.path.join(module_dir, 'snl.yaml') snldb = SNLMongoAdapter.from_file(snl_f) all_snl_ids = [] # snl ids that have a group all_missing_ids = [] # snl ids missing a group idx = 0 print 'GETTING GROUPS' for x in snldb.snlgroups.find({}, {"all_snl_ids": 1}): all_snl_ids.extend(x['all_snl_ids']) print 'CHECKING SNL' for x in snldb.snl.find({}, {'snl_id': 1}, timeout=False): print x['snl_id'] if x['snl_id'] not in all_snl_ids: print x['snl_id'], '*********' all_missing_ids.append(x['snl_id']) print 'FIXING / ADDING GROUPS' print all_missing_ids for snl_id in all_missing_ids: try: mpsnl = MPStructureNL.from_dict(snldb.snl.find_one({"snl_id": snl_id})) snldb.build_groups(mpsnl) print 'SUCCESSFUL', snl_id except: print 'ERROR with snl_id', snl_id traceback.print_exc()
module_dir = os.path.dirname(os.path.abspath(__file__)) snl_f = os.path.join(module_dir, 'snl.yaml') snldb = SNLMongoAdapter.from_file(snl_f) all_snl_ids = [] # snl ids that have a group all_missing_ids = [] # snl ids missing a group idx = 0 print 'GETTING GROUPS' for x in snldb.snlgroups.find({}, {"all_snl_ids": 1}): all_snl_ids.extend(x['all_snl_ids']) print 'CHECKING SNL' for x in snldb.snl.find({}, {'snl_id': 1}, timeout=False): print x['snl_id'] if x['snl_id'] not in all_snl_ids: print x['snl_id'], '*********' all_missing_ids.append(x['snl_id']) print 'FIXING / ADDING GROUPS' print all_missing_ids for snl_id in all_missing_ids: try: mpsnl = MPStructureNL.from_dict( snldb.snl.find_one({"snl_id": snl_id})) snldb.build_groups(mpsnl) print 'SUCCESSFUL', snl_id except: print 'ERROR with snl_id', snl_id traceback.print_exc()
def check_snls_in_snlgroups(args): """check whether SNLs in each SNLGroup still match resp. canonical SNL""" range_index = args.start / num_ids_per_stream idxs = [2 * (num_snl_streams + range_index)] idxs += [idxs[0] + 1] s = [py.Stream(stream_ids[i]) for i in idxs] for i in range(len(idxs)): s[i].open() end = num_snlgroups if args.end > num_snlgroups else args.end id_range = {"$gt": args.start, "$lte": end} snlgrp_cursor = sma.snlgroups.find({"snlgroup_id": id_range}) colors = [] num_good_ids = 0 for snlgrp_dict in snlgrp_cursor: start_time = time.clock() try: snlgrp = SNLGroup.from_dict(snlgrp_dict) except: exc_type, exc_value, exc_traceback = sys.exc_info() text = ' '.join([str(exc_type), str(exc_value)]) colors.append(category_colors[-1]) # Others data = dict(x=snlgrp_dict['snlgroup_id'] % num_ids_per_stream, y=range_index, text=text, marker=Marker(color=colors)) s[0].write(data) sleep(start_time) continue if len(snlgrp.all_snl_ids) <= 1: num_good_ids += 1 data = dict(x=[num_good_ids], y=[range_index]) s[1].write(data) sleep(start_time) continue exc_raised = False all_snls_good = True for snl_id in snlgrp.all_snl_ids: if snl_id == snlgrp.canonical_snl.snl_id: continue mpsnl_dict = sma.snl.find_one({"snl_id": snl_id}) try: mpsnl = MPStructureNL.from_dict(mpsnl_dict) is_match = matcher.fit(mpsnl.structure, snlgrp.canonical_structure) except: exc_type, exc_value, exc_traceback = sys.exc_info() exc_raised = True if exc_raised or not is_match: # Scatter (bad) if exc_raised: category = 2 if fnmatch(str(exc_type), '*pybtex*') else 3 text = ' '.join([str(exc_type), str(exc_value)]) else: category = 0 text = '%d != can:%d' % (mpsnl_dict['snl_id'], snlgrp.canonical_snl.snl_id) colors.append(category_colors[category]) data = dict(x=snlgrp_dict['snlgroup_id'] % num_ids_per_stream, y=range_index, text=text, marker=Marker(color=colors)) s[0].write(data) all_snls_good = False sleep(start_time) break if all_snls_good: # Bar (good) num_good_ids += 1 data = dict(x=[num_good_ids], y=[range_index]) s[1].write(data) sleep(start_time) for i in range(len(idxs)): s[i].close()
def analyze(args): """analyze data at any point for a copy of the streaming figure""" # NOTE: make copy online first with suffix _%Y-%m-%d and note figure id fig = py.get_figure(creds['username'], args.fig_id) if args.t: if args.fig_id == 42: label_entries = filter( None, '<br>'.join(fig['data'][2]['text']).split('<br>')) pairs = map(make_tuple, label_entries) grps = set(chain.from_iterable(pairs)) snlgrp_cursor = sma.snlgroups.aggregate([{ '$match': { 'snlgroup_id': { '$in': list(grps) }, 'canonical_snl.about.projects': { '$ne': 'CederDahn Challenge' } } }, { '$project': { 'snlgroup_id': 1, 'canonical_snl.snlgroup_key': 1, '_id': 0 } }], cursor={}) snlgroup_keys = {} for d in snlgrp_cursor: snlgroup_keys[ d['snlgroup_id']] = d['canonical_snl']['snlgroup_key'] print snlgroup_keys[40890] sma2 = SNLMongoAdapter.from_file( os.path.join(os.environ['DB_LOC'], 'materials_db.yaml')) materials_cursor = sma2.database.materials.aggregate([{ '$match': { 'snlgroup_id_final': { '$in': list(grps) }, 'snl_final.about.projects': { '$ne': 'CederDahn Challenge' } } }, { '$project': { 'snlgroup_id_final': 1, '_id': 0, 'task_id': 1, 'final_energy_per_atom': 1, 'band_gap.search_gap.band_gap': 1, 'volume': 1, 'nsites': 1 } }], cursor={}) snlgroup_data = {} for material in materials_cursor: snlgroup_id = material['snlgroup_id_final'] final_energy_per_atom = material['final_energy_per_atom'] band_gap = material['band_gap']['search_gap']['band_gap'] volume_per_atom = material['volume'] / material['nsites'] snlgroup_data[snlgroup_id] = { 'final_energy_per_atom': final_energy_per_atom, 'band_gap': band_gap, 'task_id': material['task_id'], 'volume_per_atom': volume_per_atom } print snlgroup_data[40890] filestem = 'mpworks/check_snl/results/bad_snlgroups_2_' with open(filestem+'in_matdb.csv', 'wb') as f, \ open(filestem+'notin_matdb.csv', 'wb') as g: writer1, writer2 = csv.writer(f), csv.writer(g) header = [ 'category', 'composition', 'snlgroup_id 1', 'sg_num 1', 'task_id 1', 'snlgroup_id 2', 'sg_num 2', 'task_id 2', 'delta_energy', 'delta_bandgap', 'delta_volume_per_atom', 'rms_dist', 'scenario' ] writer1.writerow(header) writer2.writerow(header) for primary_id, secondary_id in pairs: if primary_id not in snlgroup_keys or \ secondary_id not in snlgroup_keys: continue composition, primary_sg_num = snlgroup_keys[ primary_id].split('--') secondary_sg_num = snlgroup_keys[secondary_id].split( '--')[1] category = 'same SGs' if primary_sg_num == secondary_sg_num else 'diff. SGs' if primary_id not in snlgroup_data or secondary_id not in snlgroup_data: delta_energy, delta_bandgap, delta_volume_per_atom = '', '', '' else: delta_energy = "{0:.3g}".format(abs( snlgroup_data[primary_id]['final_energy_per_atom'] - \ snlgroup_data[secondary_id]['final_energy_per_atom'] )) delta_bandgap = "{0:.3g}".format(abs( snlgroup_data[primary_id]['band_gap'] - \ snlgroup_data[secondary_id]['band_gap'] )) delta_volume_per_atom = "{0:.3g}".format(abs( snlgroup_data[primary_id]['volume_per_atom'] - \ snlgroup_data[secondary_id]['volume_per_atom'] )) scenario, rms_dist_str = '', '' if category == 'diff. SGs' and delta_energy and delta_bandgap: scenario = 'different' if ( float(delta_energy) > 0.01 or float(delta_bandgap) > 0.1) else 'similar' snlgrp1_dict = sma.snlgroups.find_one( {"snlgroup_id": primary_id}) snlgrp2_dict = sma.snlgroups.find_one( {"snlgroup_id": secondary_id}) snlgrp1 = SNLGroup.from_dict(snlgrp1_dict) snlgrp2 = SNLGroup.from_dict(snlgrp2_dict) primary_structure = snlgrp1.canonical_structure secondary_structure = snlgrp2.canonical_structure rms_dist = matcher.get_rms_dist( primary_structure, secondary_structure) if rms_dist is not None: rms_dist_str = "({0:.3g},{1:.3g})".format( *rms_dist) print rms_dist_str row = [ category, composition, primary_id, primary_sg_num, snlgroup_data[primary_id]['task_id'] \ if primary_id in snlgroup_data else '', secondary_id, secondary_sg_num, snlgroup_data[secondary_id]['task_id'] \ if secondary_id in snlgroup_data else '', delta_energy, delta_bandgap, delta_volume_per_atom, rms_dist_str, scenario ] if delta_energy and delta_bandgap: writer1.writerow(row) else: writer2.writerow(row) elif args.fig_id == 16: out_fig = Figure() badsnls_trace = Scatter(x=[], y=[], text=[], mode='markers', name='SG Changes') bisectrix = Scatter(x=[0, 230], y=[0, 230], mode='lines', name='bisectrix') print 'pulling bad snls from plotly ...' bad_snls = OrderedDict() for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']): for snl_id in map(int, text.split('<br>')): bad_snls[snl_id] = category with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f: print 'pulling bad snls from database ...' mpsnl_cursor = sma.snl.find({ 'snl_id': { '$in': bad_snls.keys() }, 'about.projects': { '$ne': 'CederDahn Challenge' } }) writer = csv.writer(f) writer.writerow([ 'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors' ]) print 'writing bad snls to file ...' for mpsnl_dict in mpsnl_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ] row += _get_snl_extra_info(mpsnl) writer.writerow(row) sg_num = mpsnl.snlgroup_key.split('--')[1] if (bad_snls[mpsnl.snl_id] == 'SG default' and sg_num != '-1') or \ bad_snls[mpsnl.snl_id] == 'SG change': mpsnl.structure.remove_oxidation_states() sf = SpacegroupAnalyzer(mpsnl.structure, symprec=0.1) badsnls_trace['x'].append(mpsnl.sg_num) badsnls_trace['y'].append(sf.get_spacegroup_number()) badsnls_trace['text'].append(mpsnl.snl_id) if bad_snls[mpsnl.snl_id] == 'SG default': print sg_num, sf.get_spacegroup_number() print 'plotting out-fig ...' out_fig['data'] = Data([bisectrix, badsnls_trace]) out_fig['layout'] = Layout( showlegend=False, hovermode='closest', title='Spacegroup Assignment Changes', xaxis=XAxis(showgrid=False, title='old SG number', range=[0, 230]), yaxis=YAxis(showgrid=False, title='new SG number', range=[0, 230]), ) filename = 'spacegroup_changes_' filename += datetime.datetime.now().strftime('%Y-%m-%d') py.plot(out_fig, filename=filename, auto_open=False) elif args.fig_id == 43: # SNLGroupMemberChecker matcher2 = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, comparator=ElementComparator()) print 'pulling data from plotly ...' trace = Scatter(x=[], y=[], text=[], mode='markers', name='mismatches') bad_snls = OrderedDict() # snlgroup_id : [ mismatching snl_ids ] for category, text in zip(fig['data'][2]['y'], fig['data'][2]['text']): if category != 'mismatch': continue for entry in text.split('<br>'): fields = entry.split(':') snlgroup_id = int(fields[0].split(',')[0]) print snlgroup_id snlgrp_dict = sma.snlgroups.find_one( {'snlgroup_id': snlgroup_id}) snlgrp = SNLGroup.from_dict(snlgrp_dict) s1 = snlgrp.canonical_structure.get_primitive_structure() bad_snls[snlgroup_id] = [] for i, snl_id in enumerate(fields[1].split(',')): mpsnl_dict = sma.snl.find_one({'snl_id': int(snl_id)}) if 'CederDahn Challenge' in mpsnl_dict['about'][ 'projects']: print 'skip CederDahn: %s' % snl_id continue mpsnl = MPStructureNL.from_dict(mpsnl_dict) s2 = mpsnl.structure.get_primitive_structure() is_match = matcher2.fit(s1, s2) if is_match: continue bad_snls[snlgroup_id].append(snl_id) trace['x'].append(snlgroup_id) trace['y'].append(i + 1) trace['text'].append(snl_id) if len(bad_snls[snlgroup_id]) < 1: bad_snls.pop(snlgroup_id, None) with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f: print 'pulling bad snlgroups from database ...' snlgroup_cursor = sma.snlgroups.find({ 'snlgroup_id': { '$in': bad_snls.keys() }, }) writer = csv.writer(f) writer.writerow( ['snlgroup_id', 'snlgroup_key', 'mismatching snl_ids']) print 'writing bad snlgroups to file ...' for snlgroup_dict in snlgroup_cursor: snlgroup = SNLGroup.from_dict(snlgroup_dict) row = [ snlgroup.snlgroup_id, snlgroup.canonical_snl.snlgroup_key, ' '.join(bad_snls[snlgroup.snlgroup_id]) ] writer.writerow(row) print 'plotting out-fig ...' out_fig = Figure() out_fig['data'] = Data([trace]) out_fig['layout'] = Layout( showlegend=False, hovermode='closest', title='Member Mismatches of SNLGroup Canonicals', xaxis=XAxis(showgrid=False, title='snlgroup_id', showexponent='none'), yaxis=YAxis(showgrid=False, title='# mismatching SNLs'), ) filename = 'groupmember_mismatches_' filename += datetime.datetime.now().strftime('%Y-%m-%d') py.plot(out_fig, filename=filename, auto_open=False) else: errors = Counter() bad_snls = OrderedDict() bad_snlgroups = OrderedDict() for i, d in enumerate(fig['data']): if not isinstance(d, Scatter): continue if not 'x' in d or not 'y' in d or not 'text' in d: continue start_id = int(d['name'].split(' - ')[0][:-1]) * 1000 marker_colors = d['marker']['color'] if i < 2 * num_snl_streams: # spacegroups errors += Counter(marker_colors) for idx, color in enumerate(marker_colors): snl_id = start_id + d['x'][idx] color_index = category_colors.index(color) category = categories[color_index] bad_snls[snl_id] = category else: # groupmembers for idx, color in enumerate(marker_colors): if color != category_colors[0]: continue snlgroup_id = start_id + d['x'][idx] mismatch_snl_id, canonical_snl_id = d['text'][idx].split( ' != ') bad_snlgroups[snlgroup_id] = int(mismatch_snl_id) print errors fig_data = fig['data'][-1] fig_data['x'] = [ errors[color] for color in fig_data['marker']['color'] ] filename = _get_filename() print filename #py.plot(fig, filename=filename) with open('mpworks/check_snl/results/bad_snls.csv', 'wb') as f: mpsnl_cursor = sma.snl.find({'snl_id': {'$in': bad_snls.keys()}}) writer = csv.writer(f) writer.writerow([ 'snl_id', 'category', 'snlgroup_key', 'nsites', 'remarks', 'projects', 'authors' ]) for mpsnl_dict in mpsnl_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) row = [ mpsnl.snl_id, bad_snls[mpsnl.snl_id], mpsnl.snlgroup_key ] row += _get_snl_extra_info(mpsnl) writer.writerow(row) with open('mpworks/check_snl/results/bad_snlgroups.csv', 'wb') as f: snlgrp_cursor = sma.snlgroups.find( {'snlgroup_id': { '$in': bad_snlgroups.keys() }}) first_mismatch_snls_cursor = sma.snl.find( {'snl_id': { '$in': bad_snlgroups.values() }}) first_mismatch_snl_info = OrderedDict() for mpsnl_dict in first_mismatch_snls_cursor: mpsnl = MPStructureNL.from_dict(mpsnl_dict) first_mismatch_snl_info[mpsnl.snl_id] = _get_snl_extra_info( mpsnl) writer = csv.writer(f) writer.writerow([ 'snlgroup_id', 'snlgroup_key', 'canonical_snl_id', 'first_mismatching_snl_id', 'nsites', 'remarks', 'projects', 'authors' ]) for snlgrp_dict in snlgrp_cursor: snlgrp = SNLGroup.from_dict(snlgrp_dict) first_mismatch_snl_id = bad_snlgroups[snlgrp.snlgroup_id] row = [ snlgrp.snlgroup_id, snlgrp.canonical_snl.snlgroup_key, snlgrp.canonical_snl.snl_id, first_mismatch_snl_id ] row += [ ' & '.join(pair) if pair[0] != pair[1] else pair[0] for pair in zip( _get_snl_extra_info(snlgrp.canonical_snl), first_mismatch_snl_info[int(first_mismatch_snl_id)]) ] writer.writerow(row)