コード例 #1
0
ファイル: bibtex.py プロジェクト: pjhaest/publish
def _check_paper(paper):
    "Check required attributes"

    # TODO: Do this during parsing, so we can give error messages with line number and text

    print("Found paper: %s" % pstr(paper))

    invalid = False

    # Check that paper has all required attributes
    entry_type = paper["entrytype"]
    key = paper["key"]
    attributes = config.get("entrytype_attributes")[entry_type]
    for attribute in attributes:
        # Check if the required field is a tuple and at least one field is used
        if isinstance(attribute, tuple):
            if not len([f for f in attribute if f in paper]) >= 1:
                print('  Missing required attribute(s) "%s" for paper "%s"' %
                      ('"/"'.join(attribute), key))
                invalid = True
        elif not attribute in paper:
            print('  Missing required attribute "%s" for paper "%s"' %
                  (attribute, key))
            invalid = True

    if invalid:
        paper["invalid"] = True
        print(
            "  Skipping paper. Correct the above error(s) and import the paper again."
        )
        if not config.get("autofix"):
            input("  Press return to continue.")
コード例 #2
0
def print_summary(papers, num_found=0, num_missing=0):
    "Print summerazed result"

    print("")
    print("Summary of papers")
    print("-----------------")
    print("")

    if not (num_found == 0 and num_missing == 0):
        print("Database has %d paper(s)." % len(papers))
        print("PDF files found for %d paper(s), %d missing." % (num_found, num_missing))
        print("")

    headings = config.get("category_headings")
    categories = config.get("categories")

    # Make correct indentation for each attribute-value pair
    max_heading = max([len(headings[category]) for category in categories])

    # Count number of papers in each category
    for category in categories:
        num_papers = len([paper for paper in papers if paper["category"] == category])
        heading = headings[category]
        indentation = " " * (max_heading - len(heading))
        print("%s: %s%d" % (heading, indentation, num_papers))
    print("%s: %s%d" % ("Total", " " * (max_heading - len("Total")), len(papers)))
コード例 #3
0
ファイル: validation.py プロジェクト: pjhaest/publish
def _validate_paper_categories(paper):
    "Validate that no attributes are missing"

    # Check that category is specified
    if not "category" in paper:
        raise RuntimeError("Unable to validate paper, unknown category.")

    # Check that the paper holds all required attributes
    category = paper["category"]
    category_attributes = config.get("category_attributes")
    for attribute in category_attributes[category]:

        if isinstance(attribute, tuple):
            if not len([a for a in attribute if a in paper]) > 0:
                paper["invalid"] = True
                missing = str(attribute)
                break
        else:
            if not attribute in paper:
                paper["invalid"] = True
                missing = str(attribute)
                break

    if not is_valid(paper):
        print('  Skipping paper (missing attribute "%s")' % missing)
        if not config.get("autofix"):
            input("  Press return to continue.")
コード例 #4
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def html_format_articles(paper):
    "Return string for article in HTML format"
    values = []

    # Title
    values.append(_html_format_title(paper))

    # Author
    values.append(_html_get_authors_string(paper["author"]))

    # Journal
    values.append('<span class="%s_item_journal">%s</span>' % (config.get("html_class_prefix"), _format_venue(paper["journal"], paper["journal"], paper)))

    # Volume
    if "volume" in paper: values.append('<span class="%s_item_volum">vol. %s</span>' %  (config.get("html_class_prefix"), paper["volume"]))

    # Pages
    if "pages" in paper: values.append('<span class="%s_item_pages">pp. %s</span>' % (config.get("html_class_prefix"),
                                                                                      _html_format_pages(paper["pages"])))

    # Year
    values.append('<span class="%s_item_year">%s</span>' % (config.get("html_class_prefix"), paper["year"]))

    # DOI
    #if "doi" in paper: values.append('[<a href="http://dx.doi.org/%s">DOI:%s</a>]' % (paper["doi"], paper["doi"]))
    if "doi" in paper: values.append('[<a href="http://dx.doi.org/%s">DOI</a>]' % paper["doi"])

    # arXiv
    #if "arxiv" in paper: values.append('[<a href="http://arxiv.org/abs/%s">arXiv:%s</a>]' % (paper["arxiv"], paper["arxiv"]))
    if "arxiv" in paper: values.append('[<a href="http://arxiv.org/abs/%s">arXiv</a>]' % paper["arxiv"])

    return _html_join(values)
コード例 #5
0
ファイル: validation.py プロジェクト: pjhaest/publish
def _validate_paper_title(paper):
    "Validate that the title is correct, fix capitalization"

    # Fix capitalization
    title = paper["title"]
    for separator in (" ", "-"):

        words = title.split(separator)

        lowercase = config.get("lowercase")
        uppercase = config.get("uppercase")
        new_words = []
        for i in range(len(words)):
            word = words[i]
            if word == "":
                continue
            if word.lower() in lowercase:
                word = word.lower()
            elif word.lower() in uppercase:
                word = uppercase[word.lower()]
            else:
                word = word[0].upper() + word[1:]
            new_words.append(word)

        title = separator.join(new_words)

    paper["title"] = title
コード例 #6
0
ファイル: validation.py プロジェクト: pjhaest/publish
def _validate_paper_typos(paper):
    "Validate all paper strings for typos"

    typos = config.get("typos")

    # Check all attributes
    for attribute in paper:

        # Extract typos to check
        attribute_typos = typos["common"].copy()
        if attribute in typos:
            for typo in typos[attribute]:
                attribute_typos[typo] = typos[attribute][typo]

        # Get attribute value and convert to tuple
        value = paper[attribute]
        if isinstance(value, tuple):
            value_tuple = value
        else:
            value_tuple = (value, )

        # Check all values in tuple
        new_values = []
        for value in value_tuple:
            for typo in attribute_typos:
                replacement = attribute_typos[typo]

                # Check for typo
                if typo in value:
                    print("  Incorrectly formatted %s string: %s" %
                          (attribute, str(value)))

                    if replacement is None:

                        # Found no replacement, skip paper
                        paper["invalid"] = True
                        if config.get("autofix"):
                            print("  Skipping paper")
                        else:
                            input(
                                "  Skipping paper, press return to continue.")
                        return

                    else:

                        # Found replacement
                        value = value.replace(typo, replacement)
                        print('  Replacing typo "%s" with "%s".' %
                              (typo, replacement))
                        if not config.get("autofix"):
                            print("  Press return to continue.")

            new_values.append(value)

        # Assign corrected value
        if isinstance(paper[attribute], tuple):
            paper[attribute] = tuple(new_values)
        else:
            paper[attribute] = new_values[0]
コード例 #7
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def html_format_proceedings(paper):
    "Return string for proceeding in HTML format"
    values = []
    values += [_html_format_title(paper)]
    values += [_html_get_authors_string(paper["author"])]
    values += ['in <span class="%s_item_booktitle">%s</span>' % (config.get("html_class_prefix"), paper["booktitle"])]

    values.append('<span class="%s_item_year">%s</span>' % (config.get("html_class_prefix"), paper["year"]))
    return _html_join(values)
コード例 #8
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def html_format_edited(paper):
    "Return string for edited book in HTML format"
    values = []
    values += [_html_format_title(paper)]
    values += [_html_get_authors_string(paper["author"])]
    values += ['<span class="%s_item_publisher">%s</span>' % (config.get("html_class_prefix"), paper["publisher"])]
    values.append('<span class="%s_item_year">%s</span>' % (config.get("html_class_prefix"), paper["year"]))

    return _html_join(values)
コード例 #9
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def html_format_publicoutreach(paper):
    "Return string for public outreach in HTML format"
    values = []
    values += [_html_format_title(paper)]
    values += [_html_get_authors_string(paper["author"])]
    values += ['<span class="%s_item_meeting">%s</span>' % (config.get("html_class_prefix"), paper["meeting"])]

    values.append('<span class="%s_item_year">%s</span>' % (config.get("html_class_prefix"), paper["year"]))
    return _html_join(values)
コード例 #10
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def html_format_theses(paper):
    "Return string for thesis in HTML format"
    values = []
    values += [_html_format_title(paper)]
    values += [_html_get_authors_string(paper["author"])]
    values += [config.get("thesistype_strings")[paper["thesistype"]]]
    values += [paper["school"]]

    values.append('<span class="%s_item_year">%s</span>' % (config.get("html_class_prefix"), paper["year"]))
    return _html_join(values)
コード例 #11
0
ファイル: validation.py プロジェクト: pjhaest/publish
def _add_author(author_name):
    allowed_author_names = config.get("allowed_author_names")
    allowed_author_names.add(author_name)

    # Append to file
    filename = config.get("authornames_filename")

    try:
        file = open(filename, "a")
        file.write(author_name.strip() + "\n")
    except:
        raise RuntimeError('Unable to author to file: "%s".' % filename)
コード例 #12
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def html_format_chapters(paper):
    "Return string for chapter in HTML format"
    values = []
    values += [_html_format_title(paper)]
    values += [_html_get_authors_string(paper["author"])]
    values += ['in <span class="%s_item_publisher">%s</span>' % (config.get("html_class_prefix"), paper["booktitle"])]
    if 'editor' in paper : values += [_html_format_editors(paper["editor"])]
    values += ['<span class="%s_item_publisher">%s</span>' % (config.get("html_class_prefix"), paper["publisher"])]
    if "chapter" in paper: values += ["chapter %s" % paper["chapter"]]
    if "pages" in paper: values += ["pp. %s" % _html_format_pages(paper["pages"])]

    values.append('<span class="%s_item_year">%s</span>' % (config.get("html_class_prefix"), paper["year"]))

    return _html_join(values)
コード例 #13
0
ファイル: validation.py プロジェクト: pjhaest/publish
def _validate_paper_pages(paper):
    "Validate page range"

    # Only check if we have pages
    if not "pages" in paper:
        return
    pages = paper["pages"]

    invalid = False
    new_pages = pages

    # Check if page page must contain "-"
    if config.get("require_page_range") and not "-" in pages:
        invalid = True
    if "-" in pages:
        if "--" in pages:
            first, last = pages.split("--")[:2]
        else:
            first, last = pages.split("-")[:2]
            if len(first) == 0 or len(last) == 0:
                invalid = True

            # Reformat string
            new_pages = first.strip() + config.get(
                "page_separator") + last.strip()

    # Check for invalid page string
    if invalid:
        paper["invalid"] = True
        print("  Incorrectly formatted page string: " + pages)
        if not config.get("autofix"):
            input("  Skipping paper, press return to continue.")
        else:
            print("  Skipping paper.")
        return

    # Check if string was changed
    if not new_pages == pages:
        print("  Incorrectly formatted page string: " + pages)
        print("  Suggested correction:              " + new_pages)
        if ask_user_yesno(
                "  Would you like to accept the suggested correction:"):
            print("  Correcting page string.")
            paper["pages"] = new_pages
        else:
            paper["invalid"] = True
            if not config.get("autofix"):
                input("  Skipping paper, press return to continue.")
            else:
                print("  Skipping paper.")
コード例 #14
0
ファイル: validation.py プロジェクト: pjhaest/publish
def _add_venue(venue_type, venue_name):
    "Add venue to known venues"

    # Append to list of known venues (remember at run-time)
    known_venues = config.get(venue_type + "s")
    known_venues.append(venue_name)

    # Append to file
    filename = config.get("local_venues_filename")
    try:
        file = open(filename, "a")
        file.write("%s: %s\n" % (venue_type, venue_name))
    except:
        raise RuntimeError('Unable to add local venue to file "%s".' %
                           filename)
コード例 #15
0
def _rst_mark_author(author, text):
    "Mark the text with bold face if author is in the list of marked authors"
    if config.has_key("mark_author") and author.strip() in config.get(
            "mark_author"):
        return "_%s_" % text
    else:
        return text
コード例 #16
0
def _xml_mark_author(author, text):
    "Mark the text with bold face if author is in the list of marked authors"
    if config.has_key("mark_author") and author.strip() in config.get(
            "mark_author"):
        return '<author marked="True">%s</author>' % text
    else:
        return '<author marked="False">%s</author>' % text
コード例 #17
0
def save_invalid_papers(papers):
    "Save invalid papers to file"

    # Extract invalid papers
    invalid_papers = []
    for paper in papers:
        if not is_valid(paper):
            invalid_papers.append(paper)

    # Don't save if there are no invalid papers
    if len(invalid_papers) == 0:
        return

    # Generate filename
    date = time.strftime("%Y%m%d-%H:%m:%S")
    invalid_filename = config.get(
        "invalid_filename_prefix") + "-" + date + ".pub"

    # Write to file
    text = pub.write(invalid_papers)
    print('Saving invalid papers to "%s".' % invalid_filename)
    try:
        file = open(invalid_filename, "w")
        file.write(text)
        file.close()
    except:
        raise RuntimeError('Unable to save invalid papers to file "%s"' %
                           invalid_filename)
コード例 #18
0
def save_database(merged_papers):
    "Save to database and make a backup copy if needed"

    database_filename = config.get("database_filename")

    # Generate text to be written to file
    text = pub.write(merged_papers)

    print("")

    # Make backup copy if needed (file size of generated file is different from the current)
    # TODO: Register if changes has been made and write backup file based on that
    #       (instead of just comparing file sizes)
    if os.path.isfile(database_filename
                      ) and len(text) != os.path.getsize(database_filename):
        backup_filename = database_filename + ".bak"
        print('Saving backup copy of database to file "%s"' % backup_filename)
        try:
            shutil.copyfile(database_filename, backup_filename)
        except:
            raise RuntimeError("Unable to create backup copy of database")

    # Open and read file
    print('Saving database to file "%s"' % database_filename)
    try:
        file = open(database_filename, "w")
        file.write(text)
    except UnicodeEncodeError as e:
        try:
            file.write(text.encode('utf-8'))
        except Exception as e:
            raise RuntimeError('Unable to save database to file "%s"\n%s' %
                               (database_filename, str(e)))

    file.close()
コード例 #19
0
ファイル: bibtex.py プロジェクト: pjhaest/publish
def write(papers):
    "Format the given list of papers in the BibTeX format."

    text = ""

    for (i, paper) in enumerate(papers):
        entry_type = config.get("category2entrytype")[paper["category"]]
        if "key" in paper:
            key = paper["key"]
        else:
            key = "paper%d" % i
        text += "@%s{%s,\n" % (entry_type, key)
        for attribute in ordered_attributes(paper, _ignores):
            if attribute in ("entrytype", "key"):
                continue
            if attribute == "sortkey":
                attribute = "key"  # sortkey becomes key in Bibtex
            if attribute == "author":
                value = " and ".join(paper["author"])
            elif attribute == "editor":
                value = " and ".join(paper["editor"])
            else:
                value = str(paper[attribute])
            text += "  %s = {%s},\n" % (attribute, value)
        text += "}\n"
        if not paper == papers[-1]:
            text += "\n"

    return text
コード例 #20
0
ファイル: validation.py プロジェクト: pjhaest/publish
def validate_file(filename=None):
    "Validate data in file"

    # Use default database if file is not specified
    if filename is None and not os.path.isfile(
            config.get("database_filename")):
        print("No file specified and no database found, nothing to do.")
        return

    # Open and read database
    papers = read_database(filename)

    # Validate papers
    (database_papers, invalid_papers) = validate_papers(papers)

    # Generate keys
    database_papers = generate_keys(papers)

    # Check for PDF files
    (num_found, num_missing) = check_pdf_files(papers)

    # Checking for duplicates
    database_papers = process_duplicates(database_papers)

    # Print summary
    print_summary(papers, num_found, num_missing)

    # Save papers to database
    save_database(database_papers)
    save_invalid_papers(invalid_papers)
コード例 #21
0
def ask_user_alternatives(question, alternatives):
    "Ask for an option"

    while True:
        print(question)
        n = len(alternatives)
        for i in range(n):
            alternative = alternatives[i]
            print("  [%d] %s" % (i + 1, alternative))
        numbers = ", ".join([str(i + 1)
                             for i in range(n - 1)]) + " or " + str(n)
        if config.get("autofix"):
            print("  Autofix enabled, choosing default (1).")
            return 0

        s = input("Please enter %s (or press return to choose [1]): " %
                  numbers)
        if s.strip() == "":
            choice = 1
        else:
            try:
                choice = int(s)
            except ValueError:
                # If answer could not be converted to int, set to illegal int value
                choice = -1

        if (choice - 1) in range(n):
            return choice - 1

        print("Illegal option.")
コード例 #22
0
ファイル: validation.py プロジェクト: pjhaest/publish
def _validate_paper_venue(paper):
    "Validate that the venue (journal, conference etc) is correct"

    # Get venue type
    category = paper["category"]
    category_venues = config.get("category_venues")
    venue_type = category_venues[category]  # "journal", "booktitle", etc

    # Skip if venue is None (nothing to check)
    if venue_type is None:
        return

    # Get list of known venues
    known_venues = config.get(venue_type + "s")

    # Check that venue is valid
    venue_name = paper[venue_type]
    if not venue_name in known_venues:
        print("")
        print('  Unknown %s: "%s"' % (venue_type, venue_name))
        suggested_venue = _suggest_venue(venue_name, known_venues)
        if suggested_venue is None:
            if ask_user_yesno(
                    '  Would you like to add %s "%s"?' %
                (venue_type, venue_name), "no"):
                _add_venue(venue_type, venue_name)
            else:
                print("  Skipping paper.")
                paper["invalid"] = True
        else:
            print('  Suggested %s: "%s"' % (venue_type, suggested_venue))
            alternative = ask_user_alternatives(
                "  Unknown %s, what should I do?" % venue_type,
                ("Replace %s." % venue_type, "Add %s." % venue_type,
                 "Skip paper."))
            print("")
            if alternative == 0:
                paper[venue_type] = suggested_venue
            elif alternative == 1:
                _add_venue(venue_type, venue_name)
            elif alternative == 2:
                print("  Skipping paper (unable to guess the right %s)" %
                      venue_type)
                input("  Press return to continue.")
                paper["invalid"] = True
            else:
                raise RuntimeError("Unknown option.")
コード例 #23
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def _html_mark_author(author, text) :
  "Mark the text with bold face if author is in the list of marked authors"

  if author.strip() in config.get("mark_author") :
    return "<strong>%s</strong>" % text

  else :
    return text
コード例 #24
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def _latex_mark_author(author, text) :
  "Mark the text with bold face if author is in the list of marked authors"

  if author.strip() in config.get("mark_author") :
    return "\\textbf{%s}" % text

  else :
    return text
コード例 #25
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def rst_format_theses(paper):
    "Return string for thesis in reSt format"
    values = []
    values += [_rst_get_authors_string(paper)]
    values += [_rst_format_title(paper)]
    values += [config.get("thesistype_strings")[paper["thesistype"]]]
    values += [paper["school"]]
    values += [paper["year"]]
    return _rst_join(values)
コード例 #26
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def html_format_reports(paper):
    "Return string for report in HTML format"
    values = []
    values += [_html_format_title(paper)]
    values += [_html_get_authors_string(paper["author"])]
    values += [paper["institution"]]

    values.append('<span class="%s_item_year">%s</span>' % (config.get("html_class_prefix"), paper["year"]))
    return _html_join(values)
コード例 #27
0
ファイル: pub.py プロジェクト: pjhaest/publish
def write(papers):
    "Format the given list of papers in the pub format."

    text = []

    if config.get("use_standard_categories"):
        categories = config.get("categories")
    else:
        categories = set()
        for paper in papers:
            categories.add(paper["category"])
        categories = list(categories)
        categories.sort()

    # Iterate over categories
    for category in categories:

        # Extract papers in category
        category_papers = [
            paper for paper in papers if paper["category"] == category
        ]
        if len(category_papers) == 0:
            continue

        # Write category
        text.append("* %s\n" % category)

        # Iterate over papers in category
        for paper in category_papers:

            # Write title
            if "title" in paper:
                title = paper["title"]
            else:
                title = "missing"
            try:
                text.append("** %s\n" % title)
            except UnicodeDecodeError as e:
                text.append("** %s\n" % title.decode('utf-8'))

            # Write attributes
            text.append(write_paper(paper, ["category", "title"] + _ignores))

    return "".join(text)
コード例 #28
0
ファイル: exporting.py プロジェクト: pjhaest/publish
def export_file(filename, filters=[]):
    "Export data into desired file format"

    # Make sure we don't overwrite the database
    database_filename = config.get("database_filename")
    if filename == database_filename:
        raise RuntimeError('Papers cannot be exported to the default database ("%s").' % database_filename)

    # Read database
    database_papers = read_database(database_filename)

    # Why should the database be validated on export?
    #(valid_papers, invalid_papers) = validate_papers(database_papers)

    # Filter papers
    filtered_papers = filter_papers(database_papers, filters)

    # Get the filename suffix
    suffix = filename.split(".")[-1]

    # Choose format based on suffix
    if suffix in ("bib", "bibtex"):
        write = bibtex.write
    elif suffix == "pub":
        write = pub.write
    elif suffix == "tex":
        write = latex.write
    elif suffix == "pdf":
        write = pdf.write
    elif suffix == "html":
        write = html.write
    elif suffix == "rst":
        write = rst.write
    elif suffix == "graphml" :
        write = graphml.write
    else:
        raise RuntimeError("Unknown file format.")

    # Open and read file
    text = write(filtered_papers)
    file = open(filename, "w")
    try:
        file.write(text)
    except UnicodeEncodeError as e:
        file.write(text.encode('utf-8'))
    except Exception as e:
        raise RuntimeError('Unable to write file "%s" (exception %s: %s)'
                           % (filename, type(e), e))
    file.close()

    # Print summary
    print_summary(filtered_papers)
    print("")
    print("Exported %d paper(s) to %s." % (len(filtered_papers), filename))
コード例 #29
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def html_format_misc(paper):
    "Return string for misc in HTML format"
    values = []
    values += [_html_format_title(paper)]
    values += [_html_get_authors_string(paper["author"])]
    if "howpublished" in paper:
        howpublished = paper["howpublished"]
        if "http://" in howpublished and "<a href" not in values[0]:
            link = ("http://" + howpublished.split("http://")[-1]).strip()
            values[0] = '<a href="%s">%s</a>' % (link, values[0])
        else:
            values += [howpublished]
    if "booktitle" in paper: values += ["in <i>%s</i>" % paper["booktitle"]]
    if "meeting" in paper: values += [paper["meeting"]]
    if "thesistype" in paper: values += [config.get("thesistype_strings")[paper["thesistype"]]]
    if "school" in paper: values += [paper["school"]]
    if "chapter" in paper: values += ["chapter %s" % paper["chapter"]]
    if "volume" in paper: values += ["vol. %s" % paper["volume"]]
    if "pages" in paper: values += ["pp. %s" % _html_format_pages(paper["pages"])]
    if "year" in paper: values.append('<span class="%s_item_year">%s</span>' % (config.get("html_class_prefix"), paper["year"]))
    return _html_join(values)
コード例 #30
0
ファイル: formatting.py プロジェクト: pjhaest/publish
def _html_get_authors_string(authors):
    "Convert author tuple to author string"
    authors = [_html_mark_author(author, short_author(author).strip()) for author in authors]
    if len(authors) == 1:
        str = authors[0]
    else :
        if authors[-1] == "others":
            str =  ", ".join(authors[:-1]) + " et al."
        else:
            str = ", ".join(authors[:-1]) + " and " + authors[-1]

    return '<span class="%s_item_authors">%s</span>' % (config.get("html_class_prefix"), str)
コード例 #31
0
ファイル: publish_doconce.py プロジェクト: ischurov/doconce
def _xml_mark_author(author, text):
  "Mark the text with bold face if author is in the list of marked authors"
  if config.has_key("mark_author") and author.strip() in config.get("mark_author") :
    return '<author marked="True">%s</author>' % text
  else:
    return '<author marked="False">%s</author>' % text
コード例 #32
0
ファイル: publish_doconce.py プロジェクト: ischurov/doconce
def _rst_mark_author(author, text) :
  "Mark the text with bold face if author is in the list of marked authors"
  if config.has_key("mark_author") and author.strip() in config.get("mark_author") :
    return "_%s_" % text
  else:
    return text