Ejemplo n.º 1
0
    def resolve_conflicts(fpath, strat, force=False, verbose=True):
        """
        Parses merge conflits and takes either version
        """
        import utool as ut
        import re
        top_pat = re.escape('<' * 7)
        mid_pat = re.escape('=' * 7)
        bot_pat = re.escape('>' * 7)
        flags = re.MULTILINE | re.DOTALL
        # Pattern to remove the top part
        theirs_pat1 = re.compile('^%s.*?%s.*?$\n' % (top_pat, mid_pat), flags=flags)
        theirs_pat2 = re.compile('^%s.*?$\n' % (bot_pat), flags=flags)
        # Pattern to remove the bottom part
        ours_pat1   = re.compile('^%s.*?%s.*?$\n' % (mid_pat, bot_pat), flags=flags)
        ours_pat2   = re.compile('^%s.*?$\n' % (top_pat), flags=flags)
        strat_pats = {
            'theirs': [theirs_pat1, theirs_pat2],
            'ours': [ours_pat1, ours_pat2],
        }

        text_in = ut.readfrom(fpath)
        text_out = text_in
        strat = 'ours'
        strat = 'theirs'
        for pat in strat_pats[strat]:
            text_out = pat.sub('', text_out)
        if verbose:
            ut.print_difftext(ut.difftext(text_in, text_out, num_context_lines=3))

        if force:
            ut.writeto(fpath, text_out)
Ejemplo n.º 2
0
def get_bibtex_dict(bib_fpath):
    r"""
    Args:
        bib_fpath (str):

    Returns:
        dict: bibtex_dict

    CommandLine:
        python -m utool.util_latex --test-get_bibtex_dict
        pip install bibtexparser

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_latex import *  # NOQA
        >>> import utool as ut
        >>> bib_fpath = ut.truepath('~/latex/crall-candidacy-2015/My_Library_clean.bib')
        >>> bibtex_dict = get_bibtex_dict(bib_fpath)
        >>> result = ('bibtex_dict = %s' % (str(bibtex_dict),))
        >>> print(result)
    """
    import bibtexparser
    import utool as ut
    bibtex_str   = ut.readfrom(bib_fpath, verbose=False)
    bib_database = bibtexparser.loads(bibtex_str)
    bibtex_dict  = bib_database.get_entry_dict()
    return bibtex_dict
Ejemplo n.º 3
0
def get_bibtex_dict(bib_fpath):
    r"""
    Args:
        bib_fpath (str):

    Returns:
        dict: bibtex_dict

    CommandLine:
        python -m utool.util_latex --test-get_bibtex_dict
        pip install bibtexparser

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_latex import *  # NOQA
        >>> import utool as ut
        >>> bib_fpath = ut.truepath('~/latex/crall-candidacy-2015/My_Library_clean.bib')
        >>> bibtex_dict = get_bibtex_dict(bib_fpath)
        >>> result = ('bibtex_dict = %s' % (str(bibtex_dict),))
        >>> print(result)
    """
    import bibtexparser
    import utool as ut
    bibtex_str = ut.readfrom(bib_fpath, verbose=False)
    bib_database = bibtexparser.loads(bibtex_str)
    bibtex_dict = bib_database.get_entry_dict()
    return bibtex_dict
Ejemplo n.º 4
0
    def outline():
        """
        ./texfix.py --fpaths chapter4-application.tex --outline --asmarkdown --numlines=999 -w --ignoreinputstartswith=def,Crall,header,colordef,figdef
        """
        fpaths = testdata_fpaths()
        print('fpaths = %r' % (fpaths, ))

        for fpath in fpaths:
            text = ut.readfrom(fpath)
            root = latex_parser.LatexDocPart.parse_text(text, debug=None)

            # HACK
            new_text = '\n'.join(root.reformat_blocks(debug=None))
            # remove trailing spaces
            new_text = re.sub(' *$', '', new_text, flags=re.MULTILINE)
            # remove double newlines
            new_text = re.sub('(\n *)+\n+',
                              '\n\n',
                              new_text,
                              flags=re.MULTILINE)

            document = root.find_descendant_type('document')
            #document = root.find_descendant_type('section', pat='Identification')
            print('document = %r' % (document, ))
            if document is not None:
                root = document

            sectionpat = ut.get_argval('--section', default=None)
            if sectionpat is not None:
                root = root.find_descendant_type('section', pat=sectionpat)
                print('root = %r' % (root, ))
                if root is None:
                    # import utool
                    # utool.embed()
                    raise Exception('section %r does not exist' % (sectionpat))
            #print(root.get_debug_tree_text())

            #ut.colorprint(root.summary_str(outline=True), 'yellow')
            print('---outline---')
            outline = True
            # outline = False
            outline_text = root.summary_str(outline=outline, highlight=False)
            summary = root.summary_str(outline=outline, highlight=True)
            if not ut.get_argflag('-w'):
                print(summary)
            print('---/outline---')
            if root._config['asmarkdown']:
                codetype = 'markdown'
                newext = '.md'
            else:
                codetype = 'latex'
                newext = None

            ut.dump_autogen_code(ut.augpath(fpath,
                                            augpref='outline_',
                                            newext=newext),
                                 outline_text,
                                 codetype=codetype,
                                 fullprint=False)
Ejemplo n.º 5
0
def inject_python_code2(fpath, patch_code, tag):
    """ Does autogeneration stuff """
    import utool as ut
    text = ut.readfrom(fpath)
    start_tag = '# <%s>' % tag
    end_tag = '# </%s>' % tag
    new_text = ut.replace_between_tags(text, patch_code, start_tag, end_tag)
    ut.writeto(fpath, new_text)
Ejemplo n.º 6
0
def inject_python_code2(fpath, patch_code, tag):
    """ Does autogeneration stuff """
    import utool as ut
    text = ut.readfrom(fpath)
    start_tag = '# <%s>' % tag
    end_tag = '# </%s>' % tag
    new_text = ut.replace_between_tags(text, patch_code, start_tag, end_tag)
    ut.writeto(fpath, new_text)
Ejemplo n.º 7
0
 def caption_sentences(fpath):
     text = ut.readfrom(fpath)
     tokenstream = Tokenizer(text).tokenize()
     self = DocParser(tokenstream, fpath)
     tree = self.parse()
     for node in tree.walk():
         if isinstance(node, FigureNode):
             for x in node.walk():
                 if isinstance(x, CaptionNode):
                     for sent in ut.split_sentences2(x.resolve()):
                         yield sent
Ejemplo n.º 8
0
def research(r,
             start_line_str=None,
             rate='3',
             sentence_mode=True,
             open_file=False):
    fname = join(split(__file__)[0], 'to_speak.txt')
    if start_line_str == "prep":
        os.system(fname)
        return
    if open_file is True:
        os.system(fname)
    import utool as ut

    input_str = preprocess_research(ut.readfrom(fname))
    if sentence_mode:
        input_str = input_str.replace('\n', ' ').replace('. ', '.\n')
        input_str = re.sub('  *', ' ', input_str)

    line_count = 0
    page = 0
    page_re = re.compile(' *--- Page [0-9]* *--- *')
    if start_line_str is None:
        try:
            start_page = 0
            start_line = int(input('Did you forget the start line?'))
        except Exception:
            pass
    elif start_line_str.find('page') != -1:
        start_page = int(start_line_str.replace('page', ''))
        start_line = 0
    else:
        start_page = 0
        start_line = int(start_line_str)

    print('Starting on line: %d' % (start_line))
    print('Starting on page: %d' % (start_page))
    for line in input_str.split('\n'):
        print('____')
        # Check for page marker
        if page_re.findall(line) != []:
            page = int(re.sub(' *--- Page ', '', line).replace('---', ''))
        # Print out what is being read
        line_count += 1
        print('%d, %d > %s' % (page, line_count, line))
        if start_line > line_count or start_page > page:
            continue
        # Preprocess the line
        line = process_research_line(line)
        if line == '':
            continue
        print('--')
        robos.speak(r, line, rate)
Ejemplo n.º 9
0
    def reformat():
        """
        ./texfix.py --reformat --fpaths NewParts.tex
        >>> from texfix import *  # NOQA
        """
        fpaths = testdata_fpaths()

        for fpath in fpaths:
            text = ut.readfrom(fpath)
            root = latex_parser.LatexDocPart.parse_text(text, debug=None)

            if ut.get_argflag('--fixcref'):
                root.find(' \\\\cref')
                continue

            #print(root.children)
            #root.children = root.children[0:5]
            #print('Parsed Str Short')
            new_text = '\n'.join(root.reformat_blocks(debug=None))
            # remove trailing spaces
            new_text = re.sub(' *$', '', new_text, flags=re.MULTILINE)
            # remove double newlines
            new_text = re.sub('(\n *)+\n+',
                              '\n\n',
                              new_text,
                              flags=re.MULTILINE)

            if ut.get_argflag('--summary'):
                print('---summary---')
                root.print_summary()
                print('---/summary---')
                # ut.colorprint(root.summary_str(), 'blue')

            numchars1 = len(text.replace(' ', '').replace('\n', ''))
            numchars2 = len(new_text.replace(' ', '').replace('\n', ''))

            print('numchars1 = %r' % (numchars1, ))
            print('numchars2 = %r' % (numchars2, ))
            #assert numchars1 == numchars2, '%r == %r' % (numchars1, numchars2)

            print('old newlines = %r' % (text.count('\n'), ))
            print('new newlines = %r' % (new_text.count('\n'), ))

            #import unicodedata
            #new_text = unicodedata.normalize('NFKD', new_text).encode('ascii','ignore')
            #print('new_text = %r' % (new_text,))

            ut.dump_autogen_code(fpath,
                                 new_text,
                                 codetype='latex',
                                 fullprint=False)
Ejemplo n.º 10
0
 def test_body(count, logmode, backspace):
     ut.colorprint('\n---- count = %r -----' % (count,), 'yellow')
     ut.colorprint('backspace = %r' % (backspace,), 'yellow')
     ut.colorprint('logmode = %r' % (logmode,), 'yellow')
     if logmode:
         ut.delete('test.log')
         ut.start_logging('test.log')
     print('Start main loop')
     import time
     for count in ut.ProgressIter(range(20), freq=3, backspace=backspace):
         time.sleep(.01)
     print('Done with main loop work')
     print('Exiting main body')
     if logmode:
         ut.stop_logging()
         #print('-----DONE LOGGING----')
         testlog_text = ut.readfrom('test.log')
         print(ut.indent(testlog_text.replace('\r', '\n'), '        '))
Ejemplo n.º 11
0
 def test_body(count, logmode, backspace):
     ut.colorprint('\n---- count = %r -----' % (count, ), 'yellow')
     ut.colorprint('backspace = %r' % (backspace, ), 'yellow')
     ut.colorprint('logmode = %r' % (logmode, ), 'yellow')
     if logmode:
         ut.delete('test.log')
         ut.start_logging('test.log')
     print('Start main loop')
     import time
     for count in ut.ProgressIter(range(20), freq=3, backspace=backspace):
         time.sleep(.01)
     print('Done with main loop work')
     print('Exiting main body')
     if logmode:
         ut.stop_logging()
         #print('-----DONE LOGGING----')
         testlog_text = ut.readfrom('test.log')
         print(ut.indent(testlog_text.replace('\r', '\n'), '        '))
Ejemplo n.º 12
0
    def _read_raw_entries(bibman):
        parser = bparser.BibTexParser()
        ut.delete_keys(parser.alt_dict, ['url', 'urls'])
        parser.ignore_nonstandard_types = False
        text = ut.readfrom(bibman.fpath)

        # ensure good format
        flag = 0
        for x in re.finditer('^.*}\n[^}\n]', text, flags=re.MULTILINE):
            lineno = x.string[:x.start()].count('\n') + 1
            print('DID YOU FORGET A COMMA ON lineno = {!r}'.format(lineno))
            print(x.group())
            flag += 1
        assert not flag, 'fix formating'

        database = parser.parse(text)
        entries = database.entries
        raw_entries = database.get_entry_dict()
        raw_entries = ut.order_dict_by(raw_entries, [e['ID'] for e in entries])
        bibman.raw_text = text
        bibman.raw_entries = raw_entries
Ejemplo n.º 13
0
def dummy_import_vim(fpath=None):
    if fpath is not None:
        fpath = abspath(expanduser(fpath))

    try:
        import vim
        dohack = False
    except ImportError:
        dohack = True
        vim = None

    if vim is not None:
        if getattr(vim, '__ishack__', False):
            if fpath != vim.current.buffer.name:
                dohack = True

    if dohack:
        import sys
        import utool as ut
        vim = ut.DynStruct()
        vim.__ishack__  = True
        vim.current = ut.DynStruct()
        vim.current.window = ut.DynStruct()
        vim.current.window.cursor = (0, 0)
        if fpath is None:
            lines = [
                'line1',
                'line2',
                'line3',
            ]
        else:
            lines = ut.readfrom(fpath).splitlines()
        vim.current.buffer = DummyVimBuffer(lines)
        vim.current.buffer.name = fpath
        # VERY HACKY
        sys.modules['vim'] = vim
    return vim
Ejemplo n.º 14
0
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'f:u:n:h')
    except getopt.GetoptError:
        usage()
        sys.exit(1)

    filename = None
    url = 'www.whaleshark.org/listImages.jsp'
    number = 0

    # Handle command-line arguments
    for opt, arg in opts:
        if opt == '-h':
            usage()
            sys.exit()
        elif opt == '-f':
            filename = arg
        elif opt == '-u':
            url = arg
        elif opt == '-n':
            try:
                number = int(arg)
            except ValueError:
                usage()
                sys.exit()

    # Open the XML file and extract its contents as a DOM object
    if filename:
        XMLdata = ut.readfrom(filename)
    else:
        XMLdata = ut.url_read(url)
        #with open('XMLData.xml', 'w') as file_:
        #    file_.write(XMLdata)
    print('Downloading')
    download_sharks(XMLdata, number)
Ejemplo n.º 15
0
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'f:u:n:h')
    except getopt.GetoptError:
        usage()
        sys.exit(1)

    filename = None
    url = 'www.whaleshark.org/listImages.jsp'
    number = 0

    # Handle command-line arguments
    for opt, arg in opts:
        if opt == '-h':
            usage()
            sys.exit()
        elif opt == '-f':
            filename = arg
        elif opt == '-u':
            url = arg
        elif opt == '-n':
            try:
                number = int(arg)
            except ValueError:
                usage()
                sys.exit()

    # Open the XML file and extract its contents as a DOM object
    if filename:
        XMLdata = ut.readfrom(filename)
    else:
        XMLdata = ut.url_read(url)
        #with open('XMLData.xml', 'w') as file_:
        #    file_.write(XMLdata)
    print('Downloading')
    download_sharks(XMLdata, number)
Ejemplo n.º 16
0
def main(bib_fpath=None):
    r"""
    intro point to fixbib script

    CommmandLine:
        fixbib
        python -m fixtex bib
        python -m fixtex bib --dryrun
        python -m fixtex bib --dryrun --debug
    """

    if bib_fpath is None:
        bib_fpath = 'My Library.bib'

    # DEBUG = ub.argflag('--debug')
    # Read in text and ensure ascii format
    dirty_text = ut.readfrom(bib_fpath)

    from fixtex.fix_tex import find_used_citations, testdata_fpaths

    if exists('custom_extra.bib'):
        extra_parser = bparser.BibTexParser(ignore_nonstandard_types=False)
        parser = bparser.BibTexParser()
        ut.delete_keys(parser.alt_dict, ['url', 'urls'])
        print('Parsing extra bibtex file')
        extra_text = ut.readfrom('custom_extra.bib')
        extra_database = extra_parser.parse(extra_text, partial=False)
        print('Finished parsing extra')
        extra_dict = extra_database.get_entry_dict()
    else:
        extra_dict = None

    #udata = dirty_text.decode("utf-8")
    #dirty_text = udata.encode("ascii", "ignore")
    #dirty_text = udata

    # parser = bparser.BibTexParser()
    # bib_database = parser.parse(dirty_text)
    # d = bib_database.get_entry_dict()

    print('BIBTEXPARSER LOAD')
    parser = bparser.BibTexParser(ignore_nonstandard_types=False,
                                  common_strings=True)
    ut.delete_keys(parser.alt_dict, ['url', 'urls'])
    print('Parsing bibtex file')
    bib_database = parser.parse(dirty_text, partial=False)
    print('Finished parsing')

    bibtex_dict = bib_database.get_entry_dict()
    old_keys = list(bibtex_dict.keys())
    new_keys = []
    for key in ub.ProgIter(old_keys, label='fixing keys'):
        new_key = key
        new_key = new_key.replace(':', '')
        new_key = new_key.replace('-', '_')
        new_key = re.sub('__*', '_', new_key)
        new_keys.append(new_key)

    # assert len(ut.find_duplicate_items(new_keys)) == 0, 'new keys created conflict'
    assert len(ub.find_duplicates(new_keys)) == 0, 'new keys created conflict'

    for key, new_key in zip(old_keys, new_keys):
        if key != new_key:
            entry = bibtex_dict[key]
            entry['ID'] = new_key
            bibtex_dict[new_key] = entry
            del bibtex_dict[key]

    # The bibtext is now clean. Print it to stdout
    #print(clean_text)
    verbose = None
    if verbose is None:
        verbose = 1

    # Find citations from the tex documents
    key_list = None
    if key_list is None:
        cacher = ub.Cacher('texcite1', enabled=0)
        data = cacher.tryload()
        if data is None:
            fpaths = testdata_fpaths()
            key_list, inverse = find_used_citations(fpaths,
                                                    return_inverse=True)
            # ignore = ['JP', '?', 'hendrick']
            # for item in ignore:
            #     try:
            #         key_list.remove(item)
            #     except ValueError:
            #         pass
            if verbose:
                print('Found %d citations used in the document' %
                      (len(key_list), ))
            data = key_list, inverse
            cacher.save(data)
        key_list, inverse = data

    # else:
    #     key_list = None

    unknown_pubkeys = []
    debug_author = ub.argval('--debug-author', default=None)
    # ./fix_bib.py --debug_author=Kappes

    if verbose:
        print('Fixing %d/%d bibtex entries' %
              (len(key_list), len(bibtex_dict)))

    # debug = True
    debug = False
    if debug_author is not None:
        debug = False

    known_keys = list(bibtex_dict.keys())
    missing_keys = set(key_list) - set(known_keys)
    if extra_dict is not None:
        missing_keys.difference_update(set(extra_dict.keys()))

    if missing_keys:
        print('The library is missing keys found in tex files %s' %
              (ub.repr2(missing_keys), ))

    # Search for possible typos:
    candidate_typos = {}
    sedlines = []
    for key in missing_keys:
        candidates = ut.closet_words(key, known_keys, num=3, subset=True)
        if len(candidates) > 1:
            top = candidates[0]
            if ut.edit_distance(key, top) == 1:
                # "sed -i -e 's/{}/{}/g' *.tex".format(key, top)
                import os
                replpaths = ' '.join(
                    [relpath(p, os.getcwd()) for p in inverse[key]])
                sedlines.append("sed -i -e 's/{}/{}/g' {}".format(
                    key, top, replpaths))
        candidate_typos[key] = candidates
        print('Cannot find key = %r' % (key, ))
        print('Did you mean? %r' % (candidates, ))

    print('Quick fixes')
    print('\n'.join(sedlines))

    # group by file
    just = max([0] + list(map(len, missing_keys)))
    missing_fpaths = [inverse[key] for key in missing_keys]
    for fpath in sorted(set(ub.flatten(missing_fpaths))):
        # ut.fix_embed_globals()
        subkeys = [k for k in missing_keys if fpath in inverse[k]]
        print('')
        ut.cprint('--- Missing Keys ---', 'blue')
        ut.cprint('fpath = %r' % (fpath, ), 'blue')
        ut.cprint('{} | {}'.format('Missing'.ljust(just), 'Did you mean?'),
                  'blue')
        for key in subkeys:
            print('{} | {}'.format(ut.highlight_text(key.ljust(just), 'red'),
                                   ' '.join(candidate_typos[key])))

    # for key in list(bibtex_dict.keys()):

    if extra_dict is not None:
        # Extra database takes precidence over regular
        key_list = list(ut.unique(key_list + list(extra_dict.keys())))
        for k, v in extra_dict.items():
            bibtex_dict[k] = v

    full = ub.argflag('--full')

    for key in key_list:
        try:
            entry = bibtex_dict[key]
        except KeyError:
            continue
        self = BibTexCleaner(key, entry, full=full)

        if debug_author is not None:
            debug = debug_author in entry.get('author', '')

        if debug:
            ut.cprint(' --- ENTRY ---', 'yellow')
            print(ub.repr2(entry, nl=1))

        entry = self.fix()
        # self.clip_abstract()
        # self.shorten_keys()
        # self.fix_authors()
        # self.fix_year()
        # old_pubval = self.fix_pubkey()
        # if old_pubval:
        #     unknown_pubkeys.append(old_pubval)
        # self.fix_arxiv()
        # self.fix_general()
        # self.fix_paper_types()

        if debug:
            print(ub.repr2(entry, nl=1))
            ut.cprint(' --- END ENTRY ---', 'yellow')
        bibtex_dict[key] = entry

    unwanted_keys = set(bibtex_dict.keys()) - set(key_list)
    if verbose:
        print('Removing unwanted %d entries' % (len(unwanted_keys)))
    ut.delete_dict_keys(bibtex_dict, unwanted_keys)

    if 0:
        d1 = bibtex_dict.copy()
        full = True
        for key, entry in d1.items():
            self = BibTexCleaner(key, entry, full=full)
            pub = self.publication()
            if pub is None:
                print(self.entry['ENTRYTYPE'])

            old = self.fix_pubkey()
            x1 = self._pubval()
            x2 = self.standard_pubval(full=full)
            # if x2 is not None and len(x2) > 5:
            #     print(ub.repr2(self.entry))

            if x1 != x2:
                print('x2 = %r' % (x2, ))
                print('x1 = %r' % (x1, ))
                print(ub.repr2(self.entry))

            # if 'CVPR' in self.entry.get('booktitle', ''):
            #     if 'CVPR' != self.entry.get('booktitle', ''):
            #         break
            if old:
                print('old = %r' % (old, ))
            d1[key] = self.entry

    if full:
        d1 = bibtex_dict.copy()

        import numpy as np
        import pandas as pd
        df = pd.DataFrame.from_dict(d1, orient='index')

        paged_items = df[~pd.isnull(df['pub_accro'])]
        has_pages = ~pd.isnull(paged_items['pages'])
        print('have pages {} / {}'.format(has_pages.sum(), len(has_pages)))
        print(ub.repr2(paged_items[~has_pages]['title'].values.tolist()))

        entrytypes = dict(list(df.groupby('pub_type')))
        if False:
            # entrytypes['misc']
            g = entrytypes['online']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            entrytypes['book']
            entrytypes['thesis']
            g = entrytypes['article']
            g = entrytypes['incollection']
            g = entrytypes['conference']

        def lookup_pub(e):
            if e == 'article':
                return 'journal', 'journal'
            elif e == 'incollection':
                return 'booksection', 'booktitle'
            elif e == 'conference':
                return 'conference', 'booktitle'
            return None, None

        for e, g in entrytypes.items():
            print('e = %r' % (e, ))
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]
            if 'pub_full' in g.columns:
                place_title = g['pub_full'].tolist()
                print(ub.repr2(ub.dict_hist(place_title)))
            else:
                print('Unknown publications')

        if 'report' in entrytypes:
            g = entrytypes['report']
            missing = g[pd.isnull(g['title'])]
            if len(missing):
                print('Missing Title')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'journal' in entrytypes:
            g = entrytypes['journal']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['journal'])]
            if len(missing):
                print('Missing Journal')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'conference' in entrytypes:
            g = entrytypes['conference']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['booktitle'])]
            if len(missing):
                print('Missing Booktitle')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'incollection' in entrytypes:
            g = entrytypes['incollection']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['booktitle'])]
            if len(missing):
                print('Missing Booktitle')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'thesis' in entrytypes:
            g = entrytypes['thesis']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]
            missing = g[pd.isnull(g['institution'])]
            if len(missing):
                print('Missing Institution')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        # import utool
        # utool.embed()

    # Overwrite BibDatabase structure
    bib_database._entries_dict = bibtex_dict
    bib_database.entries = list(bibtex_dict.values())

    #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()}
    #print(ub.repr2(conftitle_to_types_set_hist))

    print('Unknown conference keys:')
    print(ub.repr2(sorted(unknown_pubkeys)))
    print('len(unknown_pubkeys) = %r' % (len(unknown_pubkeys), ))

    writer = BibTexWriter()
    writer.contents = ['comments', 'entries']
    writer.indent = '  '
    writer.order_entries_by = ('type', 'author', 'year')

    new_bibtex_str = bibtexparser.dumps(bib_database, writer)

    # Need to check
    #jegou_aggregating_2012

    # Fix the Journal Abreviations
    # References:
    # https://www.ieee.org/documents/trans_journal_names.pdf

    # Write out clean bibfile in ascii format
    clean_bib_fpath = ub.augpath(bib_fpath.replace(' ', '_'), suffix='_clean')

    if not ub.argflag('--dryrun'):
        ut.writeto(clean_bib_fpath, new_bibtex_str)
Ejemplo n.º 17
0
def install_wildbook(verbose=ut.NOT_QUIET):
    """
    Script to setup wildbook on a unix based system
    (hopefully eventually this will generalize to win32)

    CommandLine:
        # Reset
        python -m ibeis --tf reset_local_wildbook
        # Setup
        python -m ibeis --tf install_wildbook
        # Startup
        python -m ibeis --tf startup_wildbook_server --show --exec-mode

        # Reset
        python -m ibeis.control.manual_wildbook_funcs --test-reset_local_wildbook
        # Setup
        python -m ibeis.control.manual_wildbook_funcs --test-install_wildbook
        # Startup
        python -m ibeis.control.manual_wildbook_funcs --test-startup_wildbook_server --show --exec-mode


    Example:
        >>> # SCRIPT
        >>> from ibeis.control.manual_wildbook_funcs import *  # NOQA
        >>> verbose = True
        >>> result = install_wildbook()
        >>> print(result)
    """
    # TODO: allow custom specified tomcat directory
    from os.path import basename, splitext, join
    import time
    import re
    import subprocess
    try:
        output = subprocess.check_output(['java', '-version'],
                                         stderr=subprocess.STDOUT)
        _java_version = output.split('\n')[0]
        _java_version = _java_version.replace('java version ', '')
        java_version = _java_version.replace('"', '')
        print('java_version = %r' % (java_version,))
        if not java_version.startswith('1.7'):
            print('Warning wildbook is only supported for java 1.7')
    except OSError:
        output = None
    if output is None:
        raise ImportError(
            'Cannot find java on this machine. '
            'Please install java: http://www.java.com/en/download/')

    tomcat_dpath = find_or_download_tomcat()
    assert tomcat_dpath is not None, 'Could not find tomcat'
    war_fpath = find_or_download_wilbook_warfile()
    war_fname = basename(war_fpath)
    wb_target = splitext(war_fname)[0]

    # Ensure environment variables
    #os.environ['JAVA_HOME'] = find_java_jvm()
    #os.environ['TOMCAT_HOME'] = tomcat_dpath
    #os.environ['CATALINA_HOME'] = tomcat_dpath

    # Move the war file to tomcat webapps if not there
    webapps_dpath = join(tomcat_dpath, 'webapps')
    deploy_war_fpath = join(webapps_dpath, war_fname)
    if not ut.checkpath(deploy_war_fpath, verbose=verbose):
        ut.copy(war_fpath, deploy_war_fpath)

    # Ensure that the war file has been unpacked

    unpacked_war_dpath = join(webapps_dpath, wb_target)
    if not ut.checkpath(unpacked_war_dpath, verbose=verbose):
        # Need to make sure you start catalina in the same directory otherwise
        # the derby databsae gets put in in cwd
        tomcat_startup_dir = get_tomcat_startup_tmpdir()
        with ut.ChdirContext(tomcat_startup_dir):
            # Starting and stoping catalina should be sufficient to unpack the
            # war
            startup_fpath  = join(tomcat_dpath, 'bin', 'startup.sh')
            shutdown_fpath = join(tomcat_dpath, 'bin', 'shutdown.sh')
            ut.cmd(ut.quote_single_command(startup_fpath))
            print('It is NOT ok if the startup.sh fails\n')

            # wait for the war to be unpacked
            for retry_count in range(0, 6):
                time.sleep(1)
                if ut.checkpath(unpacked_war_dpath, verbose=True):
                    break
                else:
                    print('Retrying')

            # ensure that the server is ruuning
            import requests
            print('Checking if we can ping the server')
            response = requests.get('http://localhost:8080')
            if response is None or response.status_code != 200:
                print('There may be an error starting the server')
            else:
                print('Seem able to ping the server')

            # assert tht the war was unpacked
            ut.assertpath(unpacked_war_dpath, (
                'Wildbook war might have not unpacked correctly.  This may '
                'be ok. Try again. If it fails a second time, then there is a '
                'problem.'), verbose=True)

            # shutdown the server
            ut.cmd(ut.quote_single_command(shutdown_fpath))
            print('It is ok if the shutdown.sh fails')
            time.sleep(.5)

    # Make sure permissions are correctly set in wildbook
    # Comment out the line that requires authentication
    permission_fpath = join(unpacked_war_dpath, 'WEB-INF/web.xml')
    ut.assertpath(permission_fpath)
    permission_text = ut.readfrom(permission_fpath)
    lines_to_remove = [
        '/EncounterSetMarkedIndividual = authc, roles[admin]'
    ]
    new_permission_text = permission_text[:]
    for line in lines_to_remove:
        re.search(re.escape(line), permission_text)
        prefix = ut.named_field('prefix', '\\s*')
        suffix = ut.named_field('suffix', '\\s*\n')
        pattern = ('^' + prefix + re.escape(line) + suffix)
        match = re.search(pattern, permission_text,
                          flags=re.MULTILINE | re.DOTALL)
        if match is None:
            continue
        newline = '<!--%s -->' % (line,)
        repl = ut.bref_field('prefix') + newline + ut.bref_field('suffix')
        new_permission_text = re.sub(pattern, repl, permission_text,
                                     flags=re.MULTILINE | re.DOTALL)
        assert new_permission_text != permission_text, (
            'text should have changed')
    if new_permission_text != permission_text:
        print('Need to write new permission texts')
        ut.writeto(permission_fpath, new_permission_text)
    else:
        print('Permission file seems to be ok')

    print('Wildbook is installed and waiting to be started')
Ejemplo n.º 18
0
def total_purge_developed_repo(repodir):
    r"""
    Outputs commands to help purge a repo

    Args:
        repodir (str): path to developed repository

    CommandLine:
        python -m utool.util_sysreq total_purge_installed_repo --show

    Ignore:
        repodir = ut.truepath('~/code/Lasagne')

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_sysreq import *  # NOQA
        >>> import utool as ut
        >>> repodir = ut.get_argval('--repodir', default=None)
        >>> result = total_purge_installed_repo(repodir)
    """
    assert repodir is not None
    import utool as ut
    import os
    repo = ut.util_git.Repo(dpath=repodir)

    user = os.environ['USER']

    fmtdict = dict(
        user=user,
        modname=repo.modname,
        reponame=repo.reponame,
        dpath=repo.dpath,
        global_site_pkgs=ut.get_global_dist_packages_dir(),
        local_site_pkgs=ut.get_local_dist_packages_dir(),
        venv_site_pkgs=ut.get_site_packages_dir(),
    )

    commands = [
        _.format(**fmtdict) for _ in [
            'pip uninstall {modname}',
            'sudo -H pip uninstall {modname}',
            'sudo pip uninstall {modname}',
            'easy_install -m {modname}',
            'cd {dpath} && python setup.py develop --uninstall',
            # If they still exist try chowning to current user
            'sudo chown -R {user}:{user} {dpath}',
        ]
    ]
    print('Normal uninstall commands')
    print('\n'.join(commands))

    possible_link_paths = [
        _.format(**fmtdict) for _ in [
            '{dpath}/{modname}.egg-info',
            '{dpath}/build',
            '{venv_site_pkgs}/{reponame}.egg-info',
            '{local_site_pkgs}/{reponame}.egg-info',
            '{venv_site_pkgs}/{reponame}.egg-info',
        ]
    ]
    from os.path import exists, basename
    existing_link_paths = [path for path in possible_link_paths]
    print('# Delete paths and eggs')
    for path in existing_link_paths:
        if exists(path):
            if ut.get_file_info(path)['owner'] != user:
                print('sudo /bin/rm -rf {path}'.format(path=path))
            else:
                print('/bin/rm -rf {path}'.format(path=path))
        #ut.delete(path)

    print('# Make sure nothing is in the easy install paths')
    easyinstall_paths = [
        _.format(**fmtdict) for _ in [
            '{venv_site_pkgs}/easy-install.pth',
            '{local_site_pkgs}/easy-install.pth',
            '{venv_site_pkgs}/easy-install.pth',
        ]
    ]
    for path in easyinstall_paths:
        if exists(path):
            easy_install_list = ut.readfrom(path,
                                            verbose=False).strip().split('\n')
            easy_install_list_ = [basename(p) for p in easy_install_list]
            index1 = ut.listfind(easy_install_list_, repo.reponame)
            index2 = ut.listfind(easy_install_list_, repo.modname)
            if index1 is not None or index2 is not None:
                print('Found at index1=%r, index=%r' % (index1, index2))
                if ut.get_file_info(path)['owner'] != user:
                    print('sudo gvim {path}'.format(path=path))
                else:
                    print('gvim {path}'.format(path=path))

    checkcmds = [
        _.format(**fmtdict)
        for _ in ['python -c "import {modname}; print({modname}.__file__)"']
    ]
    import sys
    assert repo.modname not in sys.modules
    print("# CHECK STATUS")
    for cmd in checkcmds:
        print(cmd)
Ejemplo n.º 19
0
def total_purge_developed_repo(repodir):
    r"""
    Outputs commands to help purge a repo

    Args:
        repodir (str): path to developed repository

    CommandLine:
        python -m utool.util_sysreq total_purge_installed_repo --show

    Ignore:
        repodir = ut.truepath('~/code/Lasagne')

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_sysreq import *  # NOQA
        >>> import utool as ut
        >>> repodir = ut.get_argval('--repodir', default=None)
        >>> result = total_purge_installed_repo(repodir)
    """
    assert repodir is not None
    import utool as ut
    import os
    repo = ut.util_git.Repo(dpath=repodir)

    user = os.environ['USER']

    fmtdict = dict(
        user=user,
        modname=repo.modname,
        reponame=repo.reponame,
        dpath=repo.dpath,
        global_site_pkgs=ut.get_global_dist_packages_dir(),
        local_site_pkgs=ut.get_local_dist_packages_dir(),
        venv_site_pkgs=ut.get_site_packages_dir(),
    )

    commands = [_.format(**fmtdict) for _ in [
        'pip uninstall {modname}',
        'sudo -H pip uninstall {modname}',
        'sudo pip uninstall {modname}',
        'easy_install -m {modname}',
        'cd {dpath} && python setup.py develop --uninstall',
        # If they still exist try chowning to current user
        'sudo chown -R {user}:{user} {dpath}',
    ]]
    print('Normal uninstall commands')
    print('\n'.join(commands))

    possible_link_paths = [_.format(**fmtdict) for _ in [
        '{dpath}/{modname}.egg-info',
        '{dpath}/build',
        '{venv_site_pkgs}/{reponame}.egg-info',
        '{local_site_pkgs}/{reponame}.egg-info',
        '{venv_site_pkgs}/{reponame}.egg-info',
    ]]
    from os.path import exists, basename
    existing_link_paths = [path for path in possible_link_paths]
    print('# Delete paths and eggs')
    for path in existing_link_paths:
        if exists(path):
            if ut.get_file_info(path)['owner'] != user:
                print('sudo /bin/rm -rf {path}'.format(path=path))
            else:
                print('/bin/rm -rf {path}'.format(path=path))
        #ut.delete(path)

    print('# Make sure nothing is in the easy install paths')
    easyinstall_paths = [_.format(**fmtdict) for _ in [
        '{venv_site_pkgs}/easy-install.pth',
        '{local_site_pkgs}/easy-install.pth',
        '{venv_site_pkgs}/easy-install.pth',
    ]]
    for path in easyinstall_paths:
        if exists(path):
            easy_install_list = ut.readfrom(path, verbose=False).strip().split('\n')
            easy_install_list_ = [basename(p) for p in easy_install_list]
            index1 = ut.listfind(easy_install_list_, repo.reponame)
            index2 = ut.listfind(easy_install_list_, repo.modname)
            if index1 is not None or index2 is not None:
                print('Found at index1=%r, index=%r' % (index1, index2))
                if ut.get_file_info(path)['owner'] != user:
                    print('sudo gvim {path}'.format(path=path))
                else:
                    print('gvim {path}'.format(path=path))

    checkcmds = [_.format(**fmtdict) for _ in [
        'python -c "import {modname}; print({modname}.__file__)"'
    ]]
    import sys
    assert repo.modname not in sys.modules
    print("# CHECK STATUS")
    for cmd in checkcmds:
        print(cmd)
Ejemplo n.º 20
0
        'graph-id.tex',
        'appendix.tex',
        'main.tex',
        'graph_id.tex',
    ]
    exclude_dirs = ['guts']
    tex_fpath_list = sorted(
        ut.glob(dpath, patterns, recursive=True, exclude_dirs=exclude_dirs)
    )
    tex_fpath_list = ut.get_argval('--fpaths', type_=list, default=tex_fpath_list)
    return tex_fpath_list

fpaths = testdata_fpaths()

fpath = 'main.tex'
text = ut.readfrom(fpath)
root = latex_parser.LatexDocPart.parse_text(text, debug=None)

# root._config['asmarkdown'] = True
# root._config['numlines'] = float('inf')

commands = list(root.find_descendant_types('newcommand'))

figcommands = []
for self in commands:
    if self.fpath_root() in {'colordef.tex', 'def.tex', 'CrallDef.tex'}:
        continue
    figcommands.append(self)

cmd_to_fpaths = ut.ddict(list)
for self in figcommands:
Ejemplo n.º 21
0
def update_wildbook_install_config(webapps_dpath, unpacked_war_dpath):
    """
    CommandLine:
        python -m ibeis ensure_local_war
        python -m ibeis update_wildbook_install_config
        python -m ibeis update_wildbook_install_config --show

    Example:
        >>> from ibeis.control.wildbook_manager import *  # NOQA
        >>> import ibeis
        >>> tomcat_dpath = find_installed_tomcat()
        >>> webapps_dpath = join(tomcat_dpath, 'webapps')
        >>> wb_target = ibeis.const.WILDBOOK_TARGET
        >>> unpacked_war_dpath = join(webapps_dpath, wb_target)
        >>> locals_ = ut.exec_func_src(update_wildbook_install_config, globals())
        >>> #update_wildbook_install_config(webapps_dpath, unpacked_war_dpath)
        >>> ut.quit_if_noshow()
        >>> ut.vd(unpacked_war_dpath)
        >>> ut.editfile(locals_['permission_fpath'])
        >>> ut.editfile(locals_['jdoconfig_fpath'])
        >>> ut.editfile(locals_['asset_store_fpath'])
    """
    mysql_mode = not ut.get_argflag('--nomysql')

    #if ut.get_argflag('--vd'):
    #    ut.vd(unpacked_war_dpath)
    #find_installed_tomcat
    # Make sure permissions are correctly set in wildbook
    # Comment out the line that requires authentication
    permission_fpath = join(unpacked_war_dpath, 'WEB-INF/web.xml')
    ut.assertpath(permission_fpath)
    permission_text = ut.readfrom(permission_fpath)
    lines_to_remove = [
        # '/ImageSetSetMarkedIndividual = authc, roles[admin]'
        '/EncounterSetMarkedIndividual = authc, roles[admin]'
    ]
    new_permission_text = permission_text[:]
    for line in lines_to_remove:
        re.search(re.escape(line), permission_text)
        prefix = ut.named_field('prefix', '\\s*')
        suffix = ut.named_field('suffix', '\\s*\n')
        pattern = ('^' + prefix + re.escape(line) + suffix)
        match = re.search(pattern,
                          permission_text,
                          flags=re.MULTILINE | re.DOTALL)
        if match is None:
            continue
        newline = '<!--%s -->' % (line, )
        repl = ut.bref_field('prefix') + newline + ut.bref_field('suffix')
        new_permission_text = re.sub(pattern,
                                     repl,
                                     permission_text,
                                     flags=re.MULTILINE | re.DOTALL)
        assert new_permission_text != permission_text, (
            'text should have changed')
    if new_permission_text != permission_text:
        print('Need to write new permission texts')
        ut.writeto(permission_fpath, new_permission_text)
    else:
        print('Permission file seems to be ok')

    # Make sure we are using a non-process based database
    jdoconfig_fpath = join(unpacked_war_dpath,
                           'WEB-INF/classes/bundles/jdoconfig.properties')
    print('Fixing backend database config')
    print('jdoconfig_fpath = %r' % (jdoconfig_fpath, ))
    ut.assertpath(jdoconfig_fpath)
    jdoconfig_text = ut.readfrom(jdoconfig_fpath)
    #ut.vd(dirname(jdoconfig_fpath))
    #ut.editfile(jdoconfig_fpath)

    if mysql_mode:
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'mysql',
                                                 False)
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'derby', 1)
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'sqlite', 1)
        mysql_user = '******'
        mysql_passwd = 'somepassword'
        mysql_dbname = 'ibeiswbtestdb'
        # Use mysql
        jdoconfig_text = re.sub('datanucleus.ConnectionUserName = .*$',
                                'datanucleus.ConnectionUserName = '******'datanucleus.ConnectionPassword = .*$',
                                'datanucleus.ConnectionPassword = '******'datanucleus.ConnectionURL *= *jdbc:mysql:.*$',
            'datanucleus.ConnectionURL = jdbc:mysql://localhost:3306/' +
            mysql_dbname,
            jdoconfig_text,
            flags=re.MULTILINE)
        jdoconfig_text = re.sub('^.*jdbc:mysql://localhost:3306/shepherd.*$',
                                '',
                                jdoconfig_text,
                                flags=re.MULTILINE)
    else:
        # Use SQLIIte
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'derby', 1)
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'mysql', 1)
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'sqlite',
                                                 False)
    ut.writeto(jdoconfig_fpath, jdoconfig_text)

    # Need to make sure wildbook can store information in a reasonalbe place
    #tomcat_data_dir = join(tomcat_startup_dir, 'webapps', 'wildbook_data_dir')
    tomcat_data_dir = join(webapps_dpath, 'wildbook_data_dir')
    ut.ensuredir(tomcat_data_dir)
    ut.writeto(join(tomcat_data_dir, 'test.txt'), 'A hosted test file')
    asset_store_fpath = join(unpacked_war_dpath, 'createAssetStore.jsp')
    asset_store_text = ut.read_from(asset_store_fpath)
    #data_path_pat = ut.named_field('data_path', 'new File(".*?").toPath')
    new_line = 'LocalAssetStore as = new LocalAssetStore("example Local AssetStore", new File("%s").toPath(), "%s", true);' % (
        tomcat_data_dir, 'http://localhost:8080/' + basename(tomcat_data_dir))
    # HACKY
    asset_store_text2 = re.sub('^LocalAssetStore as = .*$',
                               new_line,
                               asset_store_text,
                               flags=re.MULTILINE)
    ut.writeto(asset_store_fpath, asset_store_text2)
Ejemplo n.º 22
0
    def fix_chktex():
        """
        ./texfix.py --fixcite --fix-chktex
        """
        import parse
        fpaths = testdata_fpaths()
        print('Running chktex')
        output_list = [
            ut.cmd('chktex', fpath, verbose=False)[0] for fpath in fpaths
        ]

        fixcite = ut.get_argflag('--fixcite')
        fixlbl = ut.get_argflag('--fixlbl')
        fixcmdterm = ut.get_argflag('--fixcmdterm')

        for fpath, output in zip(fpaths, output_list):
            text = ut.readfrom(fpath)
            buffer = text.split('\n')
            pat = '\n' + ut.positive_lookahead('Warning')
            warn_list = list(
                filter(lambda x: x.startswith('Warning'),
                       re.split(pat, output)))
            delete_linenos = []

            if not (fixcmdterm or fixlbl or fixcite):
                print(' CHOOSE A FIX ')

            modified_lines = []

            for warn in warn_list:
                warnlines = warn.split('\n')
                pres = parse.parse(
                    'Warning {num} in {fpath} line {lineno}: {warnmsg}',
                    warnlines[0])
                if pres is not None:
                    fpath_ = pres['fpath']
                    lineno = int(pres['lineno']) - 1
                    warnmsg = pres['warnmsg']
                    try:
                        assert fpath == fpath_, ('%r != %r' % (fpath, fpath_))
                    except AssertionError:
                        continue
                    if 'No errors printed' in warn:
                        #print('Cannot fix')
                        continue
                    if lineno in modified_lines:
                        print('Skipping modified line')
                        continue
                    if fixcmdterm and warnmsg == 'Command terminated with space.':
                        print('Fix command termination')
                        errorline = warnlines[1]  # NOQA
                        carrotline = warnlines[2]
                        pos = carrotline.find('^')
                        if 0:
                            print('pos = %r' % (pos, ))
                            print('lineno = %r' % (lineno, ))
                            print('errorline = %r' % (errorline, ))
                        modified_lines.append(lineno)
                        line = buffer[lineno]
                        pre_, post_ = line[:pos], line[pos + 1:]
                        newline = (pre_ + '{} ' + post_).rstrip(' ')
                        #print('newline   = %r' % (newline,))
                        buffer[lineno] = newline
                    elif fixlbl and warnmsg == 'Delete this space to maintain correct pagereferences.':
                        print('Fix label newline')
                        fpath_ = pres['fpath']
                        errorline = warnlines[1]  # NOQA
                        new_prevline = buffer[
                            lineno - 1].rstrip() + errorline.lstrip(' ')
                        buffer[lineno - 1] = new_prevline
                        modified_lines.append(lineno)
                        delete_linenos.append(lineno)
                    elif fixcite and re.match(
                            'Non-breaking space \\(.~.\\) should have been used',
                            warnmsg):
                        #print(warnmsg)
                        #print('\n'.join(warnlines))
                        print('Fix citation space')
                        carrotline = warnlines[2]
                        pos = carrotline.find('^')
                        modified_lines.append(lineno)
                        line = buffer[lineno]
                        if line[pos] == ' ':
                            pre_, post_ = line[:pos], line[pos + 1:]
                            newline = (pre_ + '~' + post_).rstrip(' ')
                        else:
                            pre_, post_ = line[:pos + 1], line[pos + 1:]
                            newline = (pre_ + '~' + post_).rstrip(' ')
                            print(warn)
                            print(line[pos])
                            assert False
                            #assert line[pos] == ' ', '%r' % line[pos]
                            break
                        if len(pre_.strip()) == 0:
                            new_prevline = buffer[
                                lineno - 1].rstrip() + newline.lstrip(' ')
                            buffer[lineno - 1] = new_prevline
                            delete_linenos.append(lineno)
                        else:
                            #print('newline   = %r' % (newline,))
                            buffer[lineno] = newline
                    #print(warn)

            if len(delete_linenos) > 0:
                mask = ut.index_to_boolmask(delete_linenos, len(buffer))
                buffer = ut.compress(buffer, ut.not_list(mask))
            newtext = '\n'.join(buffer)

            #ut.dump_autogen_code(fpath, newtext, 'tex', fullprint=False)
            ut.print_difftext(
                ut.get_textdiff(text, newtext, num_context_lines=4))
            if ut.get_argflag('-w'):
                ut.writeto(fpath, newtext)
            else:
                print('Specify -w to finialize change')
Ejemplo n.º 23
0
def load_oxford_2007():
    """
    Loads data from
    http://www.robots.ox.ac.uk:5000/~vgg/publications/2007/Philbin07/philbin07.pdf

    >>> from wbia.algo.smk.script_smk import *  # NOQA
    """
    from os.path import join, basename, splitext
    import pandas as pd
    import vtool as vt

    dbdir = ut.truepath('/raid/work/Oxford/')
    data_fpath0 = join(dbdir, 'data_2007.pkl')

    if ut.checkpath(data_fpath0):
        data = ut.load_data(data_fpath0)
        return data
    else:
        word_dpath = join(dbdir, 'word_oxc1_hesaff_sift_16M_1M')
        _word_fpath_list = ut.ls(word_dpath)
        imgid_to_word_fpath = {
            splitext(basename(word_fpath))[0]: word_fpath
            for word_fpath in _word_fpath_list
        }
        readme_fpath = join(dbdir, 'README2.txt')
        imgid_order = ut.readfrom(readme_fpath).split('\n')[20:-1]

        imgid_order = imgid_order
        data_uri_order = [x.replace('oxc1_', '') for x in imgid_order]

        imgid_to_df = {}
        for imgid in ut.ProgIter(imgid_order, label='reading kpts'):
            word_fpath = imgid_to_word_fpath[imgid]
            row_gen = (map(float,
                           line.strip('\n').split(' '))
                       for line in ut.read_lines_from(word_fpath)[2:])
            rows = [(int(word_id), x, y, e11, e12, e22)
                    for (word_id, x, y, e11, e12, e22) in row_gen]
            df = pd.DataFrame(
                rows, columns=['word_id', 'x', 'y', 'e11', 'e12', 'e22'])
            imgid_to_df[imgid] = df

        df_list = ut.take(imgid_to_df, imgid_order)

        nfeat_list = [len(df_) for df_ in df_list]
        offset_list = [0] + ut.cumsum(nfeat_list)
        shape = (offset_list[-1], 128)
        # shape = (16334970, 128)
        sift_fpath = join(dbdir, 'OxfordSIFTDescriptors',
                          'feat_oxc1_hesaff_sift.bin')
        try:
            file_ = open(sift_fpath, 'rb')
            with ut.Timer('Reading SIFT binary file'):
                nbytes = np.prod(shape)
                all_vecs = np.fromstring(file_.read(nbytes), dtype=np.uint8)
            all_vecs = all_vecs.reshape(shape)
        finally:
            file_.close()

        kpts_list = [
            df_.loc[:, ('x', 'y', 'e11', 'e12', 'e22')].values
            for df_ in df_list
        ]
        wordid_list = [df_.loc[:, 'word_id'].values for df_ in df_list]
        kpts_Z = np.vstack(kpts_list)
        idx_to_wx = np.hstack(wordid_list)

        # assert len(np.unique(idx_to_wx)) == 1E6

        # Reqd standard query order
        query_files = sorted(
            ut.glob(dbdir + '/oxford_groundtruth', '*_query.txt'))
        query_uri_order = []
        for qpath in query_files:
            text = ut.readfrom(qpath, verbose=0)
            query_uri = text.split(' ')[0].replace('oxc1_', '')
            query_uri_order.append(query_uri)

        logger.info('converting to invV')
        all_kpts = vt.convert_kptsZ_to_kpts(kpts_Z)

        data = {
            'offset_list': offset_list,
            'all_kpts': all_kpts,
            'all_vecs': all_vecs,
            'idx_to_wx': idx_to_wx,
            'data_uri_order': data_uri_order,
            'query_uri_order': query_uri_order,
        }
        ut.save_data(data_fpath0, data)
    return data
Ejemplo n.º 24
0
def update_wildbook_install_config(webapps_dpath, unpacked_war_dpath):
    """
    CommandLine:
        python -m ibeis ensure_local_war
        python -m ibeis update_wildbook_install_config
        python -m ibeis update_wildbook_install_config --show

    Example:
        >>> from ibeis.control.wildbook_manager import *  # NOQA
        >>> import ibeis
        >>> tomcat_dpath = find_installed_tomcat()
        >>> webapps_dpath = join(tomcat_dpath, 'webapps')
        >>> wb_target = ibeis.const.WILDBOOK_TARGET
        >>> unpacked_war_dpath = join(webapps_dpath, wb_target)
        >>> locals_ = ut.exec_func_src(update_wildbook_install_config, globals())
        >>> #update_wildbook_install_config(webapps_dpath, unpacked_war_dpath)
        >>> ut.quit_if_noshow()
        >>> ut.vd(unpacked_war_dpath)
        >>> ut.editfile(locals_['permission_fpath'])
        >>> ut.editfile(locals_['jdoconfig_fpath'])
        >>> ut.editfile(locals_['asset_store_fpath'])
    """
    mysql_mode = not ut.get_argflag('--nomysql')

    #if ut.get_argflag('--vd'):
    #    ut.vd(unpacked_war_dpath)
    #find_installed_tomcat
    # Make sure permissions are correctly set in wildbook
    # Comment out the line that requires authentication
    permission_fpath = join(unpacked_war_dpath, 'WEB-INF/web.xml')
    ut.assertpath(permission_fpath)
    permission_text = ut.readfrom(permission_fpath)
    lines_to_remove = [
        # '/ImageSetSetMarkedIndividual = authc, roles[admin]'
        '/EncounterSetMarkedIndividual = authc, roles[admin]'
    ]
    new_permission_text = permission_text[:]
    for line in lines_to_remove:
        re.search(re.escape(line), permission_text)
        prefix = ut.named_field('prefix', '\\s*')
        suffix = ut.named_field('suffix', '\\s*\n')
        pattern = ('^' + prefix + re.escape(line) + suffix)
        match = re.search(pattern, permission_text,
                          flags=re.MULTILINE | re.DOTALL)
        if match is None:
            continue
        newline = '<!--%s -->' % (line,)
        repl = ut.bref_field('prefix') + newline + ut.bref_field('suffix')
        new_permission_text = re.sub(pattern, repl, permission_text,
                                     flags=re.MULTILINE | re.DOTALL)
        assert new_permission_text != permission_text, (
            'text should have changed')
    if new_permission_text != permission_text:
        print('Need to write new permission texts')
        ut.writeto(permission_fpath, new_permission_text)
    else:
        print('Permission file seems to be ok')

    # Make sure we are using a non-process based database
    jdoconfig_fpath = join(unpacked_war_dpath,
                           'WEB-INF/classes/bundles/jdoconfig.properties')
    print('Fixing backend database config')
    print('jdoconfig_fpath = %r' % (jdoconfig_fpath,))
    ut.assertpath(jdoconfig_fpath)
    jdoconfig_text = ut.readfrom(jdoconfig_fpath)
    #ut.vd(dirname(jdoconfig_fpath))
    #ut.editfile(jdoconfig_fpath)

    if mysql_mode:
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'mysql', False)
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'derby', 1)
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'sqlite', 1)
        mysql_user = '******'
        mysql_passwd = 'somepassword'
        mysql_dbname = 'ibeiswbtestdb'
        # Use mysql
        jdoconfig_text = re.sub(
            'datanucleus.ConnectionUserName = .*$',
            'datanucleus.ConnectionUserName = '******'datanucleus.ConnectionPassword = .*$',
            'datanucleus.ConnectionPassword = '******'datanucleus.ConnectionURL *= *jdbc:mysql:.*$',
            'datanucleus.ConnectionURL = jdbc:mysql://localhost:3306/' + mysql_dbname,
            jdoconfig_text, flags=re.MULTILINE)
        jdoconfig_text = re.sub(
            '^.*jdbc:mysql://localhost:3306/shepherd.*$', '',
            jdoconfig_text, flags=re.MULTILINE)
    else:
        # Use SQLIIte
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'derby', 1)
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'mysql', 1)
        jdoconfig_text = ut.toggle_comment_lines(jdoconfig_text, 'sqlite', False)
    ut.writeto(jdoconfig_fpath, jdoconfig_text)

    # Need to make sure wildbook can store information in a reasonalbe place
    #tomcat_data_dir = join(tomcat_startup_dir, 'webapps', 'wildbook_data_dir')
    tomcat_data_dir = join(webapps_dpath, 'wildbook_data_dir')
    ut.ensuredir(tomcat_data_dir)
    ut.writeto(join(tomcat_data_dir, 'test.txt'), 'A hosted test file')
    asset_store_fpath = join(unpacked_war_dpath, 'createAssetStore.jsp')
    asset_store_text = ut.read_from(asset_store_fpath)
    #data_path_pat = ut.named_field('data_path', 'new File(".*?").toPath')
    new_line = 'LocalAssetStore as = new LocalAssetStore("example Local AssetStore", new File("%s").toPath(), "%s", true);' % (
        tomcat_data_dir,
        'http://localhost:8080/' + basename(tomcat_data_dir)
    )
    # HACKY
    asset_store_text2 = re.sub('^LocalAssetStore as = .*$', new_line, asset_store_text, flags=re.MULTILINE)
    ut.writeto(asset_store_fpath, asset_store_text2)