def get_items(self, snls=None, snlgroups=None, ncols=None): """iterator over same-composition groups of SNLGroups rev-sorted by size :param snls: 'snl' collection in 'snl_mp_prod' DB :type snls: QueryEngine :param snlgroups: 'snlgroups' collection in 'snl_mp_prod' DB :type snlgroups: QueryEngine :param ncols: number of columns for 2D plotly :type ncols: int """ self._matcher = StructureMatcher( ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, comparator=ElementComparator() ) self._lock = self._mgr.Lock() if not self._seq else None self._ncols = ncols if not self._seq else 1 self._nrows = div_plus_mod(self._ncores, self._ncols) if not self._seq else 1 self._counter = self.shared_list() self._counter.extend([[0]*self._ncols for i in range(self._nrows)]) self._counter_total = multiprocessing.Value('d', 0) self._mismatch_dict = self.shared_dict() self._mismatch_dict.update(dict((k,[]) for k in categories[self.checker_name])) self._mismatch_counter = self.shared_list() self._mismatch_counter.extend([0]*len(self._mismatch_dict.keys())) if py is not None: self._streams = [ py.Stream(stream_id) for stream_id in stream_ids ] for s in self._streams: s.open() self._snls = snls self._snlgroups = snlgroups if 'SNLGroup' in self.checker_name: _log.info('analyzing %d SNLGroups', self._snlgroups.collection.count()) # start pipeline to prepare aggregation of items pipeline = [{ '$project': { 'reduced_cell_formula_abc': 1, 'snlgroup_id': 1, '_id': 0 }}] group_expression = { '_id': '$reduced_cell_formula_abc', 'num_snlgroups': { '$sum': 1 }, 'snlgroup_ids': { '$addToSet': "$snlgroup_id" } } pipeline.append({ '$group': group_expression }) pipeline.append({ '$match': { 'num_snlgroups': { '$gt': 1 } } }) pipeline.append({ '$sort': { 'num_snlgroups': -1 } }) pipeline.append({ '$project': { 'snlgroup_ids': 1 } }) return self._snlgroups.collection.aggregate(pipeline, cursor={}) else: _log.info('analyzing %d SNLs', snls.collection.count()) return self._snls.query(distinct_key='snl_id')
def get_items(self, snls=None, snlgroups=None, ncols=None): """iterator over same-composition groups of SNLGroups rev-sorted by size :param snls: 'snl' collection in 'snl_mp_prod' DB :type snls: QueryEngine :param snlgroups: 'snlgroups' collection in 'snl_mp_prod' DB :type snlgroups: QueryEngine :param ncols: number of columns for 2D plotly :type ncols: int """ self._matcher = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, comparator=ElementComparator()) self._lock = self._mgr.Lock() if not self._seq else None self._ncols = ncols if not self._seq else 1 self._nrows = div_plus_mod(self._ncores, self._ncols) if not self._seq else 1 self._counter = self.shared_list() self._counter.extend([[0] * self._ncols for i in range(self._nrows)]) self._counter_total = multiprocessing.Value('d', 0) self._mismatch_dict = self.shared_dict() self._mismatch_dict.update( dict((k, []) for k in categories[self.checker_name])) self._mismatch_counter = self.shared_list() self._mismatch_counter.extend([0] * len(self._mismatch_dict.keys())) if py is not None: self._streams = [py.Stream(stream_id) for stream_id in stream_ids] for s in self._streams: s.open() self._snls = snls self._snlgroups = snlgroups if 'SNLGroup' in self.checker_name: _log.info('analyzing %d SNLGroups', self._snlgroups.collection.count()) # start pipeline to prepare aggregation of items pipeline = [{ '$project': { 'reduced_cell_formula_abc': 1, 'snlgroup_id': 1, '_id': 0 } }] group_expression = { '_id': '$reduced_cell_formula_abc', 'num_snlgroups': { '$sum': 1 }, 'snlgroup_ids': { '$addToSet': "$snlgroup_id" } } pipeline.append({'$group': group_expression}) pipeline.append({'$match': {'num_snlgroups': {'$gt': 1}}}) pipeline.append({'$sort': {'num_snlgroups': -1}}) pipeline.append({'$project': {'snlgroup_ids': 1}}) return self._snlgroups.collection.aggregate(pipeline, cursor={}) else: _log.info('analyzing %d SNLs', snls.collection.count()) return self._snls.query(distinct_key='snl_id')
matcher = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, comparator=ElementComparator()) num_ids_per_stream = 20000 num_ids_per_stream_k = num_ids_per_stream / 1000 num_snls = sma.snl.count() num_snlgroups = sma.snlgroups.count() num_pairs_per_job = 1000 * num_ids_per_stream num_pairs_max = num_snlgroups * (num_snlgroups - 1) / 2 num_snl_streams = div_plus_mod(num_snls, num_ids_per_stream) num_snlgroup_streams = div_plus_mod(num_snlgroups, num_ids_per_stream) num_jobs = div_plus_mod(num_pairs_max, num_pairs_per_job) print num_snl_streams, num_snlgroup_streams, num_jobs checks = ['spacegroups', 'groupmembers', 'canonicals'] categories = ['SG Change', 'SG Default', 'PybTeX', 'Others'] num_categories = len(categories) category_colors = ['red', 'blue', 'green', 'orange'] def _get_filename(day=True): filename = 'snl_group_check_' filename += datetime.datetime.now().strftime( '%Y-%m-%d') if day else 'stream' return filename
min_sleep = 0.052 sma = SNLMongoAdapter.auto_load() matcher = StructureMatcher( ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, comparator=ElementComparator() ) num_ids_per_stream = 20000 num_ids_per_stream_k = num_ids_per_stream/1000 num_snls = sma.snl.count() num_snlgroups = sma.snlgroups.count() num_pairs_per_job = 1000 * num_ids_per_stream num_pairs_max = num_snlgroups*(num_snlgroups-1)/2 num_snl_streams = div_plus_mod(num_snls, num_ids_per_stream) num_snlgroup_streams = div_plus_mod(num_snlgroups, num_ids_per_stream) num_jobs = div_plus_mod(num_pairs_max, num_pairs_per_job) print num_snl_streams, num_snlgroup_streams, num_jobs checks = ['spacegroups', 'groupmembers', 'canonicals'] categories = [ 'SG Change', 'SG Default', 'PybTeX', 'Others' ] num_categories = len(categories) category_colors = ['red', 'blue', 'green', 'orange'] def _get_filename(day=True): filename = 'snl_group_check_' filename += datetime.datetime.now().strftime('%Y-%m-%d') if day else 'stream' return filename def _get_shades_of_gray(num_colors):