Beispiel #1
0
    def fields_from_notes(self):
        normalized = {}
        parsed = archieml.loads(self.notes)
        for k, v in parsed.items():
            normalized[slugify(k).replace('-', '_')] = v

        return normalized
    def process_archieml(self):
        logging.info("Processing text content")
        text_url = f"https://docs.google.com/document/d/{self.code}/export?format=txt"
        text = requests.get(text_url).text \
            .replace("http:","httpCOLON") \
            .replace("https:", "httpsCOLON")

        self.details = archieml.loads(text)
        for key, value in self.details.items():
            self.details[key] = value.replace("COLON", ":")
def parse_archieml(text):
    '''
    Abstract all archieml preprocessing and parsing to this function
    '''
    text = text.replace('\r', '')
    # Obliterate ALL of google's [a][b][c] comment annotations!
    text = re.sub(r'^\[[a-z]\].+$', '', text, flags=re.M)
    text = re.sub(r'\[[a-z]\]', '', text)
    # Undo some of the auto-capitalization google docs inflicts
    return {k.lower(): v for k,v in archieml.loads(text).items() if v}
Beispiel #4
0
 def from_string(data):
     # use archieml-python parse to import data
     mpfile = MPFile.from_dict(RecursiveDict(archieml.loads(data)))
     # post-process internal representation of file contents
     for key in mpfile.document.keys():
         is_general, root_key = normalize_root_level(key)
         if is_general:
             # make part of shared (meta-)data, i.e. nest under `general` at
             # the beginning of the MPFile
             if mp_level01_titles[0] not in mpfile.document:
                 mpfile.document.insert_before(
                     mpfile.document.keys()[0],
                     (mp_level01_titles[0], RecursiveDict())
                 )
             mpfile.document.rec_update(nest_dict(
                 mpfile.document.pop(key),
                 [ mp_level01_titles[0], root_key ]
             ))
         else:
             # normalize identifier key (pop & insert)
             # using rec_update since we're looping over all entries
             # also: support data in bare tables (marked-up only by
             #       root-level identifier) by nesting under 'data'
             value = mpfile.document.pop(key)
             keys = [ root_key ]
             if isinstance(value, list): keys.append('table')
             mpfile.document.rec_update(nest_dict(value, keys))
             # Note: CSV section is marked with 'data ' prefix during iterate()
             for k,v in mpfile.document[root_key].iterate():
                 if isinstance(k, six.string_types) and \
                    k.startswith(mp_level01_titles[1]):
                     # k = table name (incl. data prefix)
                     # v = csv string from ArchieML free-form arrays
                     table_name = k[len(mp_level01_titles[1]+'_'):]
                     pd_obj = read_csv(v)
                     mpfile.document[root_key].pop(table_name)
                     mpfile.document[root_key].rec_update(nest_dict(
                         pandas_to_dict(pd_obj), [k]
                     ))
                     # make default plot (add entry in 'plots') for each
                     # table, first column as x-column
                     plots_dict = nest_dict(
                         {'x': pd_obj.columns[0], 'table': table_name},
                         [mp_level01_titles[2], 'default_{}'.format(k)]
                     )
                     if mp_level01_titles[2] in mpfile.document[root_key]:
                         mpfile.document[root_key].rec_update(plots_dict)
                     else:
                       kv = (
                         mp_level01_titles[2],
                         plots_dict[mp_level01_titles[2]]
                       )
                       mpfile.document[root_key].insert_before(k, kv)
     return mpfile
Beispiel #5
0
 def from_string(data):
     # use archieml-python parse to import data
     mpfile = MPFile.from_dict(RecursiveDict(archieml.loads(data)))
     # post-process internal representation of file contents
     for key in mpfile.document.keys():
         is_general, root_key = normalize_root_level(key)
         if is_general:
             # make part of shared (meta-)data, i.e. nest under `general` at
             # the beginning of the MPFile
             if mp_level01_titles[0] not in mpfile.document:
                 mpfile.document.insert_before(
                     mpfile.document.keys()[0],
                     (mp_level01_titles[0], RecursiveDict()))
             mpfile.document.rec_update(
                 nest_dict(mpfile.document.pop(key),
                           [mp_level01_titles[0], root_key]))
         else:
             # normalize identifier key (pop & insert)
             # using rec_update since we're looping over all entries
             # also: support data in bare tables (marked-up only by
             #       root-level identifier) by nesting under 'data'
             value = mpfile.document.pop(key)
             keys = [root_key]
             if isinstance(value, list): keys.append('table')
             mpfile.document.rec_update(nest_dict(value, keys))
             # Note: CSV section is marked with 'data ' prefix during iterate()
             for k, v in mpfile.document[root_key].iterate():
                 if isinstance(k, six.string_types) and \
                    k.startswith(mp_level01_titles[1]):
                     # k = table name (incl. data prefix)
                     # v = csv string from ArchieML free-form arrays
                     table_name = k[len(mp_level01_titles[1] + '_'):]
                     pd_obj = read_csv(v)
                     mpfile.document[root_key].pop(table_name)
                     mpfile.document[root_key].rec_update(
                         nest_dict(pandas_to_dict(pd_obj), [k]))
                     # make default plot (add entry in 'plots') for each
                     # table, first column as x-column
                     plots_dict = nest_dict(
                         {
                             'x': pd_obj.columns[0],
                             'table': table_name
                         }, [mp_level01_titles[2], 'default_{}'.format(k)])
                     if mp_level01_titles[2] in mpfile.document[root_key]:
                         mpfile.document[root_key].rec_update(plots_dict)
                     else:
                         kv = (mp_level01_titles[2],
                               plots_dict[mp_level01_titles[2]])
                         mpfile.document[root_key].insert_before(k, kv)
     return mpfile
Beispiel #6
0
 def from_string(data):
     # use archieml-python parse to import data
     rdct = RecursiveDict(archieml.loads(data))
     rdct.rec_update()
     # post-process internal representation of file contents
     for key in rdct.keys():
         is_general, root_key = normalize_root_level(key)
         if is_general:
             # make part of shared (meta-)data, i.e. nest under `general` at
             # the beginning of the MPFile
             if mp_level01_titles[0] not in rdct:
                 rdct.insert_before(rdct.keys()[0],
                                    (mp_level01_titles[0], RecursiveDict()))
             rdct.rec_update(
                 nest_dict(rdct.pop(key), [mp_level01_titles[0], root_key]))
         else:
             # normalize identifier key (pop & insert)
             # using rec_update since we're looping over all entries
             # also: support data in bare tables (marked-up only by
             #       root-level identifier) by nesting under 'data'
             value = rdct.pop(key)
             keys = [root_key]
             if isinstance(value, list): keys.append('table')
             rdct.rec_update(nest_dict(value, keys))
             # Note: CSV section is marked with 'data ' prefix during iterate()
             for k, v in rdct[root_key].iterate():
                 if isinstance(k, six.string_types) and \
                    k.startswith(mp_level01_titles[1]):
                     # k = table name (incl. data prefix)
                     # v = csv string from ArchieML free-form arrays
                     table_name = k[len(mp_level01_titles[1] + '_'):]
                     pd_obj = read_csv(v)
                     rdct[root_key].pop(table_name)
                     rdct[root_key].rec_update(
                         nest_dict(pd_obj.to_dict(), [k]))
                     rdct[root_key].insert_default_plot_options(pd_obj, k)
             # convert CIF strings into pymatgen structures
             if mp_level01_titles[3] in rdct[root_key]:
                 from pymatgen.io.cif import CifParser
                 for name in rdct[root_key][mp_level01_titles[3]].keys():
                     cif = rdct[root_key][mp_level01_titles[3]].pop(name)
                     parser = CifParser.from_string(cif)
                     structure = parser.get_structures(primitive=False)[0]
                     rdct[root_key][mp_level01_titles[3]].rec_update(
                         nest_dict(structure.as_dict(), [name]))
     return MPFile.from_dict(rdct)
Beispiel #7
0
    def _main(self):
        '''Downloads Google Doc contents and convert to JSON using ArchieML.'''

        print '\nDownloading docs...'

        total_json = {}
        stories_list = []

        service = self._build_service()

        for doc_id in self.doc_ids:
            contents = self._get_file_contents(service, doc_id)

            contents_json = archieml.loads(contents)

            contents_json = self._parse_plain_text_for_html(contents_json)

            stories_list.append(contents_json)

        total_json['stories'] = stories_list

        self._write_json(total_json)
Beispiel #8
0
    def _main(self):
        """Downloads Google Doc contents and convert to JSON using ArchieML."""

        print "\nDownloading docs..."

        total_json = {}
        stories_list = []

        service = self._build_service()

        for doc_id in self.doc_ids:
            contents = self._get_file_contents(service, doc_id)

            contents_json = archieml.loads(contents)

            contents_json = self._parse_plain_text_for_html(contents_json)

            stories_list.append(contents_json)

        total_json["stories"] = stories_list

        self._write_json(total_json)
def make_context(asset_depth=0):
    """
    Create a base-context for rendering views.
    Includes app_config and JS/CSS includers.

    `asset_depth` indicates how far into the url hierarchy
    the assets are hosted. If 0, then they are at the root.
    If 1 then at /foo/, etc.
    """
    context = flatten_app_config()

    try:
        with open(app_config.COPY_PATH) as f:
            # REMOVE the BOM. Google Docs downloaded as "text/plain" apparently save as UTF-8 with a BOM.
            # archieml-python will fail to parse first line of the document correctly if there's a BOM.
            data = f.read().decode('utf-8-sig').encode('utf-8')
            context['COPY'] = archieml.loads(data)
    except:
        pass

    context['JS'] = JavascriptIncluder(asset_depth=asset_depth)
    context['CSS'] = CSSIncluder(asset_depth=asset_depth)

    return context
Beispiel #10
0
    def from_string(data):
        # use archieml-python parse to import data
        rdct = RecursiveDict(loads(data))
        rdct.rec_update()

        # post-process internal representation of file contents
        for key in list(rdct.keys()):
            is_general, root_key = normalize_root_level(key)

            if is_general:
                # make part of shared (meta-)data, i.e. nest under `general` at
                # the beginning of the MPFile
                if mp_level01_titles[0] not in rdct:
                    rdct[mp_level01_titles[0]] = RecursiveDict()
                    rdct.move_to_end(mp_level01_titles[0], last=False)

            # normalize identifier key (pop & insert)
            # using rec_update since we're looping over all entries
            # also: support data in bare tables (marked-up only by
            #       root-level identifier) by nesting under 'data'
            value = rdct.pop(key)
            keys = [mp_level01_titles[0]] if is_general else []
            keys.append(root_key)
            if isinstance(value, list):
                keys.append("table")
            rdct.rec_update(nest_dict(value, keys))

            # reference to section to iterate or parse as CIF
            section = (rdct[mp_level01_titles[0]][root_key]
                       if is_general else rdct[root_key])

            # iterate to find CSV sections to parse
            # also parse propnet quantities
            if isinstance(section, dict):
                scope = []
                for k, v in section.iterate():
                    level, key = k
                    key = "".join([replacements.get(c, c) for c in key])
                    level_reduction = bool(level < len(scope))
                    if level_reduction:
                        del scope[level:]
                    if v is None:
                        scope.append(key)
                    elif isinstance(v, list) and isinstance(v[0], dict):
                        table = ""
                        for row_dct in v:
                            table = "\n".join([table, row_dct["value"]])
                        pd_obj = read_csv(table)
                        d = nest_dict(pd_obj.to_dict(), scope + [key])
                        section.rec_update(d, overwrite=True)
                        if not is_general and level == 0:
                            section.insert_default_plot_options(pd_obj, key)
                    elif (Quantity is not None
                          and isinstance(v, six.string_types) and " " in v):
                        quantity = Quantity.from_key_value(key, v)
                        d = nest_dict(quantity.as_dict(), scope +
                                      [key])  # TODO quantity.symbol.name
                        section.rec_update(d, overwrite=True)

            # convert CIF strings into pymatgen structures
            if mp_level01_titles[3] in section:
                from pymatgen.io.cif import CifParser

                for name in section[mp_level01_titles[3]].keys():
                    cif = section[mp_level01_titles[3]].pop(name)
                    parser = CifParser.from_string(cif)
                    structure = parser.get_structures(primitive=False)[0]
                    section[mp_level01_titles[3]].rec_update(
                        nest_dict(structure.as_dict(), [name]))

        return MPFile.from_dict(rdct)
Beispiel #11
0
            self._copy[section] = Section(section, doc[section])



#c = Copy('data/copy.txt')

#print dir(c)

# for section in c:
#     print '---------------------'
#     print section


# Open the file
with open('data/copy.aml') as f:
    # REMOVE the BOM. Google Docs downloaded as "text/plain" apparently save as UTF-8 with a BOM.
    # Failing to remove the BOM will cause the first line of the document to be parsed incorrectly
    data = f.read().decode("utf-8-sig").encode("utf-8")
    a = archieml.loads(data)

print a
for section in a:
    print '---------------------'
    print section
    if type(a[section]) in [OrderedDict,list,dict]:
        for item in a[section]:
            print item