Ejemplo n.º 1
0
    def get_items(self, snls=None, snlgroups=None, ncols=None):
        """iterator over same-composition groups of SNLGroups rev-sorted by size

        :param snls: 'snl' collection in 'snl_mp_prod' DB
        :type snls: QueryEngine
        :param snlgroups: 'snlgroups' collection in 'snl_mp_prod' DB
        :type snlgroups: QueryEngine
        :param ncols: number of columns for 2D plotly
        :type ncols: int
        """
        self._matcher = StructureMatcher(
            ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True,
            attempt_supercell=False, comparator=ElementComparator()
        )
        self._lock = self._mgr.Lock() if not self._seq else None
        self._ncols = ncols if not self._seq else 1
        self._nrows = div_plus_mod(self._ncores, self._ncols) if not self._seq else 1
        self._counter = self.shared_list()
        self._counter.extend([[0]*self._ncols for i in range(self._nrows)])
        self._counter_total = multiprocessing.Value('d', 0)
        self._mismatch_dict = self.shared_dict()
        self._mismatch_dict.update(dict((k,[]) for k in categories[self.checker_name]))
        self._mismatch_counter = self.shared_list()
        self._mismatch_counter.extend([0]*len(self._mismatch_dict.keys()))
        if py is not None:
            self._streams = [ py.Stream(stream_id) for stream_id in stream_ids ]
            for s in self._streams: s.open()
        self._snls = snls
        self._snlgroups = snlgroups
        if 'SNLGroup' in self.checker_name:
            _log.info('analyzing %d SNLGroups', self._snlgroups.collection.count())
            # start pipeline to prepare aggregation of items
            pipeline = [{ '$project': {
                'reduced_cell_formula_abc': 1, 'snlgroup_id': 1, '_id': 0
            }}]
            group_expression = {
                '_id': '$reduced_cell_formula_abc',
                'num_snlgroups': { '$sum': 1 },
                'snlgroup_ids': { '$addToSet': "$snlgroup_id" }
            }
            pipeline.append({ '$group': group_expression })
            pipeline.append({ '$match': { 'num_snlgroups': { '$gt': 1 } } })
            pipeline.append({ '$sort': { 'num_snlgroups': -1 } })
            pipeline.append({ '$project': { 'snlgroup_ids': 1 } })
            return self._snlgroups.collection.aggregate(pipeline, cursor={})
        else:
            _log.info('analyzing %d SNLs', snls.collection.count())
            return self._snls.query(distinct_key='snl_id')
Ejemplo n.º 2
0
    def get_items(self, snls=None, snlgroups=None, ncols=None):
        """iterator over same-composition groups of SNLGroups rev-sorted by size

        :param snls: 'snl' collection in 'snl_mp_prod' DB
        :type snls: QueryEngine
        :param snlgroups: 'snlgroups' collection in 'snl_mp_prod' DB
        :type snlgroups: QueryEngine
        :param ncols: number of columns for 2D plotly
        :type ncols: int
        """
        self._matcher = StructureMatcher(ltol=0.2,
                                         stol=0.3,
                                         angle_tol=5,
                                         primitive_cell=True,
                                         scale=True,
                                         attempt_supercell=False,
                                         comparator=ElementComparator())
        self._lock = self._mgr.Lock() if not self._seq else None
        self._ncols = ncols if not self._seq else 1
        self._nrows = div_plus_mod(self._ncores,
                                   self._ncols) if not self._seq else 1
        self._counter = self.shared_list()
        self._counter.extend([[0] * self._ncols for i in range(self._nrows)])
        self._counter_total = multiprocessing.Value('d', 0)
        self._mismatch_dict = self.shared_dict()
        self._mismatch_dict.update(
            dict((k, []) for k in categories[self.checker_name]))
        self._mismatch_counter = self.shared_list()
        self._mismatch_counter.extend([0] * len(self._mismatch_dict.keys()))
        if py is not None:
            self._streams = [py.Stream(stream_id) for stream_id in stream_ids]
            for s in self._streams:
                s.open()
        self._snls = snls
        self._snlgroups = snlgroups
        if 'SNLGroup' in self.checker_name:
            _log.info('analyzing %d SNLGroups',
                      self._snlgroups.collection.count())
            # start pipeline to prepare aggregation of items
            pipeline = [{
                '$project': {
                    'reduced_cell_formula_abc': 1,
                    'snlgroup_id': 1,
                    '_id': 0
                }
            }]
            group_expression = {
                '_id': '$reduced_cell_formula_abc',
                'num_snlgroups': {
                    '$sum': 1
                },
                'snlgroup_ids': {
                    '$addToSet': "$snlgroup_id"
                }
            }
            pipeline.append({'$group': group_expression})
            pipeline.append({'$match': {'num_snlgroups': {'$gt': 1}}})
            pipeline.append({'$sort': {'num_snlgroups': -1}})
            pipeline.append({'$project': {'snlgroup_ids': 1}})
            return self._snlgroups.collection.aggregate(pipeline, cursor={})
        else:
            _log.info('analyzing %d SNLs', snls.collection.count())
            return self._snls.query(distinct_key='snl_id')
Ejemplo n.º 3
0
matcher = StructureMatcher(ltol=0.2,
                           stol=0.3,
                           angle_tol=5,
                           primitive_cell=True,
                           scale=True,
                           attempt_supercell=False,
                           comparator=ElementComparator())

num_ids_per_stream = 20000
num_ids_per_stream_k = num_ids_per_stream / 1000
num_snls = sma.snl.count()
num_snlgroups = sma.snlgroups.count()
num_pairs_per_job = 1000 * num_ids_per_stream
num_pairs_max = num_snlgroups * (num_snlgroups - 1) / 2

num_snl_streams = div_plus_mod(num_snls, num_ids_per_stream)
num_snlgroup_streams = div_plus_mod(num_snlgroups, num_ids_per_stream)
num_jobs = div_plus_mod(num_pairs_max, num_pairs_per_job)
print num_snl_streams, num_snlgroup_streams, num_jobs

checks = ['spacegroups', 'groupmembers', 'canonicals']
categories = ['SG Change', 'SG Default', 'PybTeX', 'Others']
num_categories = len(categories)
category_colors = ['red', 'blue', 'green', 'orange']


def _get_filename(day=True):
    filename = 'snl_group_check_'
    filename += datetime.datetime.now().strftime(
        '%Y-%m-%d') if day else 'stream'
    return filename
Ejemplo n.º 4
0
min_sleep = 0.052

sma = SNLMongoAdapter.auto_load()
matcher = StructureMatcher(
    ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True,
    attempt_supercell=False, comparator=ElementComparator()
)

num_ids_per_stream = 20000
num_ids_per_stream_k = num_ids_per_stream/1000
num_snls = sma.snl.count()
num_snlgroups = sma.snlgroups.count()
num_pairs_per_job = 1000 * num_ids_per_stream
num_pairs_max = num_snlgroups*(num_snlgroups-1)/2

num_snl_streams = div_plus_mod(num_snls, num_ids_per_stream)
num_snlgroup_streams = div_plus_mod(num_snlgroups, num_ids_per_stream)
num_jobs = div_plus_mod(num_pairs_max, num_pairs_per_job)
print num_snl_streams, num_snlgroup_streams, num_jobs

checks = ['spacegroups', 'groupmembers', 'canonicals']
categories = [ 'SG Change', 'SG Default', 'PybTeX', 'Others' ]
num_categories = len(categories)
category_colors = ['red', 'blue', 'green', 'orange']

def _get_filename(day=True):
    filename = 'snl_group_check_'
    filename += datetime.datetime.now().strftime('%Y-%m-%d') if day else 'stream'
    return filename

def _get_shades_of_gray(num_colors):