def test_implementation_constructor(): # check that both calls are valid fn = DATA_DIR / "example_wos.ris" with open(fn, "r") as f: entries1 = rispy.load(f, implementation="wok") with open(fn, "r") as f: entries2 = rispy.load(f, implementation=rispy.RisImplementation.WOK) assert entries1 == entries2
def test_load_example_full_ris_without_whitespace(): # Parse files without whitespace after ER tag. # Resolves https://github.com/MrTango/rispy/pull/25 filepath = DATA_DIR / "example_full_without_whitespace.ris" expected = [ { "type_of_reference": "JOUR", "id": "12345", "primary_title": "Title of reference", "first_authors": ["Marx, Karl", "Lindgren, Astrid"], "secondary_authors": ["Glattauer, Daniel"], "publication_year": "2014//", "notes_abstract": "BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.", # noqa: E501 "keywords": ["Pippi", "Nordwind", "Piraten"], "alternate_title3": "Lorem", "alternate_title2": "lorem", "volume": "9", "number": "3", "start_page": "e0815", "place_published": "United States", "publisher": "Fun Factory", "issn": "1932-6208", "note": "1008150341", "file_attachments2": "http://example.com", "url": "http://example_url.com", }, { "type_of_reference": "JOUR", "id": "12345", "primary_title": "The title of the reference", "first_authors": ["Marxus, Karlus", "Lindgren, Astrid"], "secondary_authors": ["Glattauer, Daniel"], "publication_year": "2006//", "notes_abstract": "BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.", # noqa: E501 "keywords": ["Pippi Langstrumpf", "Nordwind", "Piraten"], "alternate_title3": "Lorem", "alternate_title2": "lorem", "volume": "6", "number": "3", "start_page": "e0815341", "place_published": "Germany", "publisher": "Dark Factory", "issn": "1732-4208", "note": "1228150341", "file_attachments2": "http://example2.com", "url": "http://example_url.com", }, ] with open(filepath, "r") as f: entries = rispy.load(f) assert expected == entries
def test_list_tag_enforcement(): filepath = DATA_DIR / "example_custom_list_tags.ris" expected = { "type_of_reference": "JOUR", "authors": ["Marx, Karl", "Marxus, Karlus"], "issn": ["12345", "ABCDEFG", "666666"], } entries = rispy.load(filepath, enforce_list_tags=False, list_tags=[]) assert expected == entries[0]
def test_strip_bom(): expected = {"type_of_reference": "JOUR", "doi": "10.1186/s40981-020-0316-0"} filepath = DATA_DIR / "example_bom.ris" # we properly decode the content of this file as UTF-8, but leave the BOM with open(filepath, "r", encoding="utf-8") as f: entries = rispy.load(f) print(entries) assert expected == entries[0]
def test_wos_ris(): fn = DATA_DIR / "example_wos.ris" with open(fn, "r") as f: entries = rispy.load(f, implementation=rispy.WokParser) assert len(entries) == 2 title = "Interactions stabilizing the structure of the core light-harvesting complex (LHl) of photosynthetic bacteria and its subunit (B820)" # noqa: E501 assert entries[0]["document_title"] == title title = "Proximal and distal influences on ligand binding kinetics in microperoxidase and heme model compounds" # noqa: E501 assert entries[1]["document_title"] == title
def test_file_implementation_write(): class CustomParser(rispy.RisParser): DEFAULT_IGNORE = ["JF", "ID", "KW"] class CustomWriter(rispy.RisWriter): DEFAULT_IGNORE = ["JF", "ID", "KW"] list_tags = ["SN", "T1", "A1", "UR"] fn = DATA_DIR / "example_full.ris" with open(fn, "r") as f: entries = rispy.load(f, implementation=CustomParser, list_tags=list_tags) fn_write = DATA_DIR / "example_full_write.ris" with open(fn_write, "w") as f: rispy.dump(entries, f, implementation=CustomWriter, list_tags=list_tags) with open(fn_write, "r") as f: reload = rispy.load(f, implementation=CustomParser, list_tags=list_tags) assert reload == entries
def test_load_example_basic_ris(): filepath = DATA_DIR / "example_basic.ris" expected = { "type_of_reference": "JOUR", "authors": ["Shannon,Claude E."], "year": "1948/07//", "title": "A Mathematical Theory of Communication", "alternate_title3": "Bell System Technical Journal", "start_page": "379", "end_page": "423", "volume": "27", } # test with file object with open(filepath, "r") as f: entries = rispy.load(f) assert expected == entries[0] # test with pathlib object p = Path(filepath) entries = rispy.load(p) assert expected == entries[0]
def test_unkown_skip(): filepath = DATA_DIR / "example_multi_unknown_tags.ris" expected = { "type_of_reference": "JOUR", "authors": ["Shannon,Claude E."], "year": "1948/07//", "title": "A Mathematical Theory of Communication", "alternate_title3": "Bell System Technical Journal", "end_page": "423", "volume": "27", } with open(filepath, "r") as f: entries = rispy.load(f, skip_unknown_tags=True) assert expected == entries[0]
def read_ris(fp): """RIS file reader. Parameters ---------- fp: str, pathlib.Path File path to the RIS file. label: bool Check for label. If None, this is automatic. Returns ------- pandas.DataFrame: Dataframe with entries. """ encodings = ['ISO-8859-1', 'utf-8', 'utf-8-sig'] entries = None for encoding in encodings: try: with open(fp, 'r', encoding=encoding) as bibliography_file: mapping = _tag_key_mapping(reverse=False) entries = list(rispy.load(bibliography_file, mapping=mapping)) break except UnicodeDecodeError: pass except IOError as e: logging.warning(e) if entries is None: raise ValueError("Cannot find proper encoding for data file.") df = pd.DataFrame(entries) def converter(x): try: return ", ".join(x) except TypeError: return "" for tag in LIST_TYPE_TAGS: key = TAG_KEY_MAPPING[tag] if key in df: df[key] = df[key].apply(converter) return standardize_dataframe(df)
def test_load_multiline_ris(): filepath = DATA_DIR / "multiline.ris" expected = { "type_of_reference": "JOUR", "authors": ["Shannon,Claude E."], "year": "1948/07//", "title": "A Mathematical Theory of Communication", "alternate_title3": "Bell System Technical Journal", "start_page": "379", "end_page": "423", "notes_abstract": "first line, then second line and at the end the last line", "notes": ["first line", "* second line", "* last line"], "volume": "27", } with open(filepath, "r") as f: entries = rispy.load(f) assert expected == entries[0]
def test_load_single_unknown_tag_ris(): filepath = DATA_DIR / "example_single_unknown_tag.ris" expected = { "type_of_reference": "JOUR", "authors": ["Shannon,Claude E."], "year": "1948/07//", "title": "A Mathematical Theory of Communication", "alternate_title3": "Bell System Technical Journal", "start_page": "379", "end_page": "423", "volume": "27", "unknown_tag": {"JP": ["CRISPR", "Direct Current"]}, } with open(filepath, "r") as f: entries = rispy.load(f) assert expected == entries[0]
def create(self, request): # ler o arquivo da request arq = request.FILES['file'] #cria um arquivo temporario com os dados do arquivo with open('/code/temp/temp.ris', 'wb+') as destination: for chunk in arq.chunks(): destination.write(chunk) #ler o arquivo temporario salvo acima with open('/code/temp/temp.ris', 'r') as bibliography_file: entries = rispy.load(bibliography_file) for entry in entries: if Article.objects.filter(id=entry['id']): del entry['id'] #cria os objetos mapeados pela rispy Article.objects.create(**entry) #retorna mensagem de sucesso return Response({'message': 'Articles criados com sucesso!!'})
def main(): # Get input arguments parser = argparse.ArgumentParser( description= 'Given a list of accession numbers in a file, extract records that match those ids' ) parser.add_argument( "-i", "--input_file", type=str, help= "The path to the file that contains the accession numbers to search for", required=True) parser.add_argument( "-d", "--data_file", type=str, help="The path to the data file. Should be in .ris format", required=True) parser.add_argument( "-o", "--output_file", type=str, help= "The path to the output file for records taht match. Will be in .ris format", required=True) input_file = '' data_file = '' output_file = '' try: args = parser.parse_args() input_file = args.input_file data_file = args.data_file output_file = args.output_file except: parser.print_help() sys.exit(0) # Get list of accession ids from input file accession_no_list = [] try: accession_no_list = [line.strip() for line in open(input_file)] except: print(f'Unable to open input file {input_file}') sys.exit(0) # Interate over data file and if ids is one specificed in the id file add to list selected_records = [] try: with open(data_file, 'r') as bibliography_file: entries = rispy.load(bibliography_file) for entry in entries: if entry['accession_number'] in accession_no_list: selected_records.append(entry) except OSError as err: print("OS error: {0}".format(err)) sys.exit(0) except: print("Unexpected error:", sys.exc_info()[0]) raise # This is pretty inefficient. We are storing the selected records to a list # and this list could get huge! try: with open(output_file, 'w') as output_file: rispy.dump(selected_records, output_file) except OSError as err: print("OS error: {0}".format(err)) sys.exit(0) except: print("Unexpected error:", sys.exc_info()[0]) raise
def test_starting_newline(): fn = DATA_DIR / "example_starting_newlines.ris" with open(fn, "r") as f: entries = rispy.load(f) assert len(entries) == 1
from pprint import pprint import rispy # define file path for local RIS files filepath = 'C:\\Users\\mobarget\\Google Drive\\RIS_export_PolishNationalLibrary' # define filepath as directory containing iterable files and read each file titles = [] for f in os.listdir(filepath): print(f) # return file names, e.g. Primo_RIS_Export.ris f_path = os.path.join(filepath, f) with open(f_path, 'r', encoding="utf-8") as bibliography_file: print(bibliography_file) entries = rispy.load(bibliography_file) # entries are called based on standard RIS format # for files deviating from this format, you may need to use a tag-key-mapper # check rispy documentation for further details for entry in entries: title = entry['primary_title'] print( title ) # special characters are shown correctly in Jupyter notebook titles.append(title) outpath = 'C:\\Users\\mobarget\\Google Drive\\' out = open( os.path.join(outpath, 'ris_out.csv'), 'w', encoding="utf_8_sig"
outpath = 'C:\\XXXXX' # define keyword and output lists keyword = "yourstring" titles = [] wrong_ids = [] # define filepath as directory containing iterable files and read each file for f in os.listdir(filepath): #print(f) # return file names, e.g. Primo_RIS_Export.ris f_path = os.path.join(filepath, f) with open(f_path, 'r', encoding="latin-1") as bibliography_file: #print(bibliography_file) # returns RIS meta-information for file try: data = rispy.load( bibliography_file, strict=False) # accepts non-standard RIS if "strict=False" finddata(data) # get data via function except: OSError continue # entries are called based on standard RIS format # for files deviating from this format, you may need to use a tag-key-mapper # check rispy documentation for further details def finddata(x): try: title = x[0][ 'primary_title'] # get first and only item from list and dictionary data by key