Example #1
0
 def connect(self, **kwargs):
     super(Connector, self).connect(**kwargs)
     self.contribs_db = self.default_db
     db_type = kwargs.get('db_type')
     if db_type is not None:
         self.contribs_db = self.get_database(db_type)
     from mpcontribs.rest.adapter import ContributionMongoAdapter
     self.contrib_ad = ContributionMongoAdapter(self.contribs_db)
     from mpcontribs.builder import MPContributionsBuilder
     self.contrib_build_ad = MPContributionsBuilder(self.contribs_db)
import sys, json, os
from pympler import asizeof

if __name__ == '__main__':

    if not os.path.exists('martin_lab.json'):
        from mpcontribs.users.martin_lab.pre_submission import run
        from mpcontribs.io.archieml.mpfile import MPFile
        from mpcontribs.rest.adapter import ContributionMongoAdapter
        from mpcontribs.builder import MPContributionsBuilder, export_notebook

        mpfile = MPFile.from_file('MPContribs/mpcontribs/users/martin_lab/mpfile_init.txt')
        run(mpfile)
        cma = ContributionMongoAdapter()
        for mpfile_single in mpfile.split():
            contributor = 'Patrick Huck <*****@*****.**>'
            doc = cma.submit_contribution(mpfile_single, contributor)
            cid = doc['_id']
            print doc.keys()
            mcb = MPContributionsBuilder(doc)
            build_doc = mcb.build(contributor, cid)
            nb = build_doc[-1]
            print nb.keys()
            with open('martin_lab.json', 'w') as f:
                json.dump(nb, f)
            print 'DONE'

    with open('martin_lab.json', 'r') as f:
        nb = json.load(f)
        for idx, cell in enumerate(nb['cells']):
            if idx: # skip first cell
Example #3
0
import sys, json, os
from pympler import asizeof

if not os.path.exists('martin_lab.json'):
    from mpcontribs.users.martin_lab.pre_submission import run
    from mpcontribs.io.archieml.mpfile import MPFile
    from mpcontribs.rest.adapter import ContributionMongoAdapter
    from mpcontribs.builder import MPContributionsBuilder, export_notebook

    mpfile = MPFile.from_file('MPContribs/mpcontribs/users/martin_lab/mpfile_init.txt')
    run(mpfile)
    cma = ContributionMongoAdapter()
    for mpfile_single in mpfile.split():
        contributor = 'Patrick Huck <*****@*****.**>'
        doc = cma.submit_contribution(mpfile_single, contributor)
        cid = doc['_id']
        print doc.keys()
        mcb = MPContributionsBuilder(doc)
        build_doc = mcb.build(contributor, cid)
        nb = build_doc[-1]
        print nb.keys()
        with open('martin_lab.json', 'w') as f:
            json.dump(nb, f)
        print 'DONE'

with open('martin_lab.json', 'r') as f:
    nb = json.load(f)
    for idx, cell in enumerate(nb['cells']):
        if idx: # skip first cell
            obj_size = asizeof.asizeof(cell) / 1024. / 1024.
            if obj_size > 1.:
Example #4
0
def process_mpfile(path_or_mpfile, target=None, fmt='archieml'):
    try:
        if isinstance(path_or_mpfile, six.string_types) and \
           not os.path.isfile(path_or_mpfile):
            raise Exception('{} not found'.format(path_or_mpfile))
        mod = import_module('mpcontribs.io.{}.mpfile'.format(fmt))
        MPFile = getattr(mod, 'MPFile')
        full_name = pwd.getpwuid(os.getuid())[4]
        contributor = '{} <*****@*****.**>'.format(full_name)  # fake
        cma = ContributionMongoAdapter()
        axes, ov_data = set(), dict()
        # split input MPFile into contributions: treat every mp_cat_id as separate DB insert
        mpfile, cid_shorts = MPFile.from_dict(), []  # output
        for idx, mpfile_single in enumerate(
                MPFile.from_file(path_or_mpfile).split()):
            mp_cat_id = mpfile_single.document.keys()[0]
            # TODO test update mode
            cid = mpfile_single.document[mp_cat_id].get('cid', None)
            update = bool(cid is not None)
            if update:
                cid_short = get_short_object_id(cid)
                yield 'use contribution #{} to update ID #{} ... '.format(
                    idx, cid_short)
            # always run local "submission" to catch failure before interacting with DB
            yield 'locally process contribution #{} ... '.format(idx)
            doc = cma.submit_contribution(
                mpfile_single, contributor)  # does not use get_string
            cid = doc['_id']
            yield 'check consistency ... '
            mpfile_single_cmp = MPFile.from_string(mpfile_single.get_string())
            if mpfile_single.document != mpfile_single_cmp.document:
                json.encoder.FLOAT_REPR = lambda o: format(o, 'g')
                # compare json strings to find first inconsistency
                for a, b in zip(
                        json.dumps(mpfile_single.document,
                                   indent=4).split('\n'),
                        json.dumps(mpfile_single_cmp.document,
                                   indent=4).split('\n')):
                    if a != b:
                        raise Exception('{} <====> {}'.format(
                            a.strip(), b.strip()))
            if target is not None:
                yield 'submit to MP ... '
                cid = target.submit_contribution(mpfile_single,
                                                 fmt)  # uses get_string
            cid_short = get_short_object_id(cid)
            mpfile_single.insert_id(mp_cat_id, cid)
            cid_shorts.append(cid_short)
            yield 'build notebook ... '
            if target is not None:
                url = target.build_contribution(cid)
                url = '/'.join(
                    [target.preamble.rsplit('/', 1)[0], 'explorer', url])
                yield (
                    "OK. <a href='{}' class='btn btn-default btn-xs' " +
                    "role='button' target='_blank'>View</a></br>").format(url)
            else:
                mcb = MPContributionsBuilder(doc)
                build_doc = mcb.build(contributor, cid)
                yield build_doc
                yield 'determine overview axes ... '
                scope, local_axes = [], set()
                mpfile_for_axes = MPFile.from_contribution(doc)
                for k, v in mpfile_for_axes.hdata[mp_cat_id].iterate():
                    if v is None:
                        scope = scope[:k[0]]
                        scope.append(k[1])
                    else:
                        try:
                            if k[0] == len(scope): scope.append(k[1])
                            else: scope[-1] = k[1]
                            vf = float(v)  # trigger exception
                            scope_str = '.'.join(scope)
                            if idx == 0:
                                axes.add(scope_str)
                                ov_data[scope_str] = {
                                    cid_short: (vf, mp_cat_id)
                                }
                            else:
                                local_axes.add(scope_str)
                                ov_data[scope_str][cid_short] = (vf, mp_cat_id)
                        except:
                            pass
                if idx > 0:
                    axes.intersection_update(local_axes)
                yield 'OK.</br>'.format(idx, cid_short)
            mpfile.concat(mpfile_single)
            time.sleep(.01)
        ncontribs = len(cid_shorts)
        #if target is not None and \
        #   isinstance(path_or_mpfile, six.string_types) and \
        #   os.path.isfile(path_or_mpfile):
        #    yield 'embed #{} in MPFile ...'.format('/'.join(cid_shorts))
        #    mpfile.write_file(path_or_mpfile, with_comments=True)
        if target is not None:
            yield '<strong>{} contributions successfully submitted.</strong>'.format(
                ncontribs)
        else:
            for k in ov_data:
                if k not in axes:
                    ov_data.pop(k)
            yield ov_data
            yield '<strong>{} contributions successfully processed.</strong>'.format(
                ncontribs)
    except:
        ex = sys.exc_info()[1]
        yield 'FAILED.</br>'
        yield str(ex).replace('"', "'")
        return
Example #5
0
def process_mpfile(path_or_mpfile, target=None, fmt='archieml', ids=None):
    try:
        if isinstance(path_or_mpfile, six.string_types) and \
           not os.path.isfile(path_or_mpfile):
            raise Exception('{} not found'.format(path_or_mpfile))

        if ids is not None and not isinstance(ids, list) and not len(ids) == 2:
            raise Exception('{} is not list of length 2!'.format(ids))

        from pymatgen.analysis.structure_matcher import StructureMatcher
        mod = import_module('mpcontribs.io.{}.mpfile'.format(fmt))
        MPFile = getattr(mod, 'MPFile')
        full_name = pwd.getpwuid(os.getuid())[4]
        contributor = '{} <*****@*****.**>'.format(full_name)  # fake
        cma = ContributionMongoAdapter()
        axes, ov_data = set(), dict()
        mpfile_out, cid_shorts = MPFile(), []  # output
        sm = StructureMatcher(primitive_cell=False, scale=False)

        # split input MPFile into contributions: treat every mp_cat_id as separate DB insert
        mpfile_in = path_or_mpfile
        if isinstance(path_or_mpfile, six.string_types) or isinstance(
                path_or_mpfile, StringIO):
            mpfile_in = MPFile.from_file(path_or_mpfile)
        for idx, mpfile_single in enumerate(mpfile_in.split()):

            mp_cat_id = mpfile_single.document.keys()[0]
            if ids is None or mp_cat_id == ids[0]:

                cid = mpfile_single.document[mp_cat_id].get('cid', None)
                update = bool(cid is not None)
                if update:
                    cid_short = get_short_object_id(cid)
                    yield 'use #{} to update #{} ... '.format(idx, cid_short)

                # always run local "submission" to catch failure before interacting with DB
                yield 'process #{} ({}) ... '.format(idx, mp_cat_id)
                doc = cma.submit_contribution(
                    mpfile_single, contributor)  # does not use get_string
                cid = doc['_id']
                cid_short = get_short_object_id(cid)
                if ids is None or cid_short == ids[1]:

                    yield 'check ... '
                    obj_size = asizeof.asizeof(mpfile_single) / 1024. / 1024.
                    if obj_size > 0.5:
                        yield 'skip ({:.3f}MB) ... '.format(obj_size)
                    else:
                        try:
                            mpfile_single_cmp_str = mpfile_single.get_string()
                        except Exception as ex:
                            yield 'get_string() FAILED!<br>'
                            continue
                        try:
                            mpfile_single_cmp = MPFile.from_string(
                                mpfile_single_cmp_str)
                        except Exception as ex:
                            yield 'from_string() FAILED!<br>'
                            continue
                        if mpfile_single.document != mpfile_single_cmp.document:
                            yield 'check again ... '
                            found_inconsistency = False
                            # check structural data
                            structures_ok = True
                            for name, s1 in mpfile_single.sdata[
                                    mp_cat_id].iteritems():
                                s2 = mpfile_single_cmp.sdata[mp_cat_id][name]
                                if s1 != s2:
                                    if len(s1) != len(s2):
                                        yield 'different number of sites: {} -> {}!<br>'.format(
                                            len(s1), len(s2))
                                        structures_ok = False
                                        break
                                    if s1.lattice != s2.lattice:
                                        yield 'lattices different!<br>'
                                        structures_ok = False
                                        break
                                    for site in s1:
                                        if site not in s2:
                                            found_inconsistency = True
                                            if not sm.fit(s1, s2):
                                                yield 'structures do not match!<br>'
                                                structures_ok = False
                                            break
                                        if not structures_ok:
                                            break
                            if not structures_ok:
                                continue
                            # check hierarchical and tabular data
                            # compare json strings to find first inconsistency
                            json_compare(mpfile_single.hdata,
                                         mpfile_single_cmp.hdata)
                            json_compare(mpfile_single.tdata,
                                         mpfile_single_cmp.tdata)
                            if not found_inconsistency:
                                # documents are not equal, but all components checked, skip contribution
                                # should not happen
                                yield 'inconsistency found but not identified!<br>'
                                continue

                    if target is not None:
                        yield 'submit ... '
                        cid = target.submit_contribution(
                            mpfile_single, fmt)  # uses get_string
                    mpfile_single.insert_id(mp_cat_id, cid)
                    cid_shorts.append(cid_short)

                    if target is not None:
                        if idx < 5:
                            yield 'build ... '
                            url = target.build_contribution(cid)
                            url = '/'.join([
                                target.preamble.rsplit('/', 1)[0], 'explorer',
                                url
                            ])
                            yield (
                                "OK. <a href='{}' class='btn btn-default btn-xs' "
                                + "role='button' target='_blank'>View</a></br>"
                            ).format(url)
                        else:
                            target.set_build_flag(cid, True)
                            yield 'OK (queued).</br>'
                    else:
                        if (ids is None and idx < 5) or ids is not None:
                            yield 'build ... '
                            mcb = MPContributionsBuilder(doc)
                            build_doc = mcb.build(contributor, cid)
                        else:
                            yield 'skip ... '
                            from pymatgen.util.provenance import Author
                            author = Author.parse_author(contributor)
                            build_doc = [mp_cat_id, author.name, cid_short, '']
                        yield build_doc

                        yield 'overview axes ... '
                        scope, local_axes = [], set()
                        mpfile_for_axes = MPFile.from_contribution(doc)
                        for k, v in mpfile_for_axes.hdata[mp_cat_id].iterate():
                            if v is None:
                                scope = scope[:k[0]]
                                scope.append(k[1])
                            else:
                                try:
                                    if k[0] == len(scope): scope.append(k[1])
                                    else: scope[-1] = k[1]
                                    vf = float(v)  # trigger exception
                                    scope_str = '.'.join(scope)
                                    if idx == 0:
                                        axes.add(scope_str)
                                        ov_data[scope_str] = {
                                            cid_short: (vf, mp_cat_id)
                                        }
                                    else:
                                        local_axes.add(scope_str)
                                        ov_data[scope_str][cid_short] = (
                                            vf, mp_cat_id)
                                except:
                                    pass
                        if idx > 0:
                            axes.intersection_update(local_axes)
                        yield 'OK.</br>'.format(idx, cid_short)

                else:
                    yield 'wrong CID.</br>'

            mpfile_out.concat(mpfile_single)
            time.sleep(.01)

        ncontribs = len(cid_shorts)
        if target is not None:
            yield '<strong>{} contributions successfully submitted.</strong>'.format(
                ncontribs)
        else:
            for k in ov_data.keys():
                if k not in axes:
                    ov_data.pop(k)
            yield ov_data
            yield '<strong>{} contributions successfully processed.</strong>'.format(
                ncontribs)
    except:
        ex = sys.exc_info()[1]
        yield 'FAILED.</br>'
        yield str(ex).replace('"', "'")
        return
Example #6
0
def process_mpfile(path_or_mpfile, target=None, fmt='archieml'):
    try:
        if isinstance(path_or_mpfile, six.string_types) and \
           not os.path.isfile(path_or_mpfile):
            raise Exception('{} not found'.format(path_or_mpfile))
        mod = import_module('mpcontribs.io.{}.mpfile'.format(fmt))
        MPFile = getattr(mod, 'MPFile')
        full_name = pwd.getpwuid(os.getuid())[4]
        contributor = '{} <*****@*****.**>'.format(full_name) # fake
        cma = ContributionMongoAdapter()
        axes, ov_data = set(), dict()
        # split input MPFile into contributions: treat every mp_cat_id as separate DB insert
        mpfile, cid_shorts = MPFile.from_dict(), [] # output
        for idx, mpfile_single in enumerate(MPFile.from_file(path_or_mpfile).split()):
            mp_cat_id = mpfile_single.document.keys()[0]
            # TODO test update mode
            cid = mpfile_single.document[mp_cat_id].get('cid', None)
            update = bool(cid is not None)
            if update:
                cid_short = get_short_object_id(cid)
                yield 'use contribution #{} to update ID #{} ... '.format(idx, cid_short)
            # always run local "submission" to catch failure before interacting with DB
            yield 'locally process contribution #{} ... '.format(idx)
            doc = cma.submit_contribution(mpfile_single, contributor) # does not use get_string
            cid = doc['_id']
            yield 'check consistency ... '
            mpfile_single_cmp = MPFile.from_string(mpfile_single.get_string())
            if mpfile_single.document != mpfile_single_cmp.document:
                # compare json strings to find first inconsistency
                for a, b in zip(
                    json.dumps(mpfile_single.document, indent=4).split('\n'),
                    json.dumps(mpfile_single_cmp.document, indent=4).split('\n')
                ):
                    if a != b:
                        raise Exception('{} <====> {}'.format(a.strip(), b.strip()))
            if target is not None:
                yield 'submit to MP ... '
                cid = target.submit_contribution(mpfile_single, fmt) # uses get_string
            cid_short = get_short_object_id(cid)
            mpfile_single.insert_id(mp_cat_id, cid)
            cid_shorts.append(cid_short)
            yield 'build into {} ... '.format(mp_cat_id)
            if target is not None:
                url = target.build_contribution(cid)
                url = '/'.join([target.preamble.rsplit('/', 1)[0], 'explorer', url])
                yield ("OK. <a href='{}' class='btn btn-default btn-xs' " +
                       "role='button' target='_blank'>View</a></br>").format(url)
            else:
                mcb = MPContributionsBuilder(doc)
                build_doc = mcb.build(contributor, cid)
                yield build_doc
                yield 'determine overview axes ... '
                scope, local_axes = [], set()
                for k,v in build_doc[3]['tree_data'].iterate():
                    if v is None:
                        scope = scope[:k[0]]
                        scope.append(k[1])
                    else:
                        try:
                            if k[0] == len(scope): scope.append(k[1])
                            else: scope[-1] = k[1]
                            vf = float(v) # trigger exception
                            scope_str = '.'.join(scope)
                            if idx == 0:
                                axes.add(scope_str)
                                ov_data[scope_str] = {cid_short: (vf, mp_cat_id)}
                            else:
                                local_axes.add(scope_str)
                                ov_data[scope_str][cid_short] = (vf, mp_cat_id)
                        except:
                            pass
                if idx > 0:
                    axes.intersection_update(local_axes)
                yield 'OK.</br>'.format(idx, cid_short)
            mpfile.concat(mpfile_single)
            time.sleep(.01)
        ncontribs = len(cid_shorts)
        #if target is not None and \
        #   isinstance(path_or_mpfile, six.string_types) and \
        #   os.path.isfile(path_or_mpfile):
        #    yield 'embed #{} in MPFile ...'.format('/'.join(cid_shorts))
        #    mpfile.write_file(path_or_mpfile, with_comments=True)
        if target is not None:
            yield '<strong>{} contributions successfully submitted.</strong>'.format(ncontribs)
        else:
            for k in ov_data:
                if k not in axes:
                    ov_data.pop(k)
            yield ov_data
            yield '<strong>{} contributions successfully processed.</strong>'.format(ncontribs)
    except:
        ex = sys.exc_info()[1]
        yield 'FAILED.</br>'
        yield str(ex).replace('"',"'")
        return