def mergeResultData(result1, result2):
    """
    Merges bibtex and extra_data dictionaries for a SearchResult and/or a Paper

    :param result1:
    :param result2:
    :return:
    """
    # if there's no year we should update the ID after getting the year
    to_update_id = not result1.bib.get('year') or not 'ID' in result1.bib

    for field in BIB_FIELDS_TRANSFER:
        if len(str(result2.bib.get(field, ''))) > len(
                str(result1.bib.get(field, ''))):
            result1.bib[field] = str(result2.bib[field])

    for field in ['ID', 'ENTRYTYPE']:
        if field in result2.bib:
            result1.bib[field] = str(result2.bib[field])

    if 'ID' not in result2.bib and to_update_id:
        if 'ID' in result1.bib:
            del result1.bib['ID']
        fixBibData(result1.bib, 1)

    for field in result2.extra_data:
        if field not in result1.extra_data:
            result1.extra_data[field] = result2.extra_data[field]

    if 'urls' in result2.extra_data:
        for url in result2.extra_data['urls']:
            addUrlIfNew(result1, url['url'], url['type'], url['source'])

    refreshDOIfromURLs(result1)
    return result1
    def getMetadata(self, paper, identity):
        if not paper.doi:
            raise ValueError("Paper has no DOI")

        url = 'https://api.unpaywall.org/v2/%s?email=%s' % (paper.doi,
                                                            identity)

        r = self.request(url)

        data = r.json()
        if data.get('error') == 'true':
            return

        top_url = data.get('best_oa_location')
        if not top_url:
            return

        if top_url.get('url_for_pdf') in top_url:
            addUrlIfNew(paper, top_url['url_for_pdf'], 'pdf', 'unpaywall')
        if top_url.get('url_for_landing_page'):
            addUrlIfNew(paper, top_url['url_for_landing_page'], 'main',
                        'unpaywall')
        if top_url.get('url'):
            url = top_url['url']
            if isPDFURL(url):
                type = 'pdf'
            else:
                type = 'main'

            addUrlIfNew(paper, url, type, 'unpaywall')

        paper.extra_data['done_unpaywall'] = True
    def search(self,
               title,
               identity,
               max_results=5,
               min_year=None,
               max_year=None):
        url = 'https://www.semanticscholar.org/api/1/search'

        yearFilter = None

        if min_year or max_year:
            yearFilter = {}
            if not max_year:
                now = datetime.datetime.now()
                max_year = now.year

            if min_year:
                yearFilter['min'] = int(min_year)
            if max_year:
                yearFilter['max'] = int(max_year)

        results_left = max_results
        page_num = 1

        return_results = []

        while results_left > 0:
            data = {
                "queryString": title,
                "page": page_num,
                "pageSize": 10,
                "sort": "relevance",
                "authors": [],
                "coAuthors": [],
                "venues": [],
                "yearFilter": yearFilter,
                "requireViewablePdf": False,
                "publicationTypes": [],
                "externalContentTypes": []
            }

            r = self.request(url, data=data, post=True)

            results_dict = r.json()

            if results_dict.get(
                    'totalResults'
            ) and max_results != results_dict['totalResults']:
                max_results = min(max_results, results_dict['totalResults'])
                results_left = max_results

            if 'results' in results_dict:
                results = results_dict['results']
            else:
                results = []

            results_left -= len(results)

            for index, res in enumerate(results[:results_left]):

                res_title = res['title']['text']

                authors_processed = []
                for author_list in res['authors']:
                    for author_dict in author_list:
                        if 'name' in author_dict:
                            authors_processed.append(author_dict)

                authors = self.loadSSAuthors(authors_processed)

                bib = {
                    'title':
                    res_title,
                    'abstract':
                    res['paperAbstract']['text'],
                    'year':
                    res['year']['text'],
                    'url':
                    'https://www.semanticscholar.org/paper/{}/{}'.format(
                        res['slug'], res['id']),
                    'author':
                    authorListFromDict(authors),
                }

                if res.get('doiInfo'):
                    bib['doi'] = res['doiInfo'].get('doi')

                extra_data = {'ss_id': res['id'], 'x_authors': authors}

                new_res = SearchResult(index, bib, 'semantischolar',
                                       extra_data)

                for link in res.get('links', []):
                    if isPDFURL(link['url']):
                        bib['eprint'] = link['url']
                        addUrlIfNew(new_res, link['url'], 'pdf',
                                    'semanticscholar')

                venue = res['venue'].get('text')
                extra_data['venue'] = venue
                return_results.append(new_res)

        return return_results
    def search(self, title, identity, max_results=5):
        url = 'http://export.arxiv.org/api/query?search_query=title:{}&start=0&max_results={}'.format(
            urllib.parse.quote(title), max_results)
        r = self.request(url)

        text = BytesIO(r.content)
        tree = etree.parse(text)

        ns_map = {
            'ns': 'http://www.w3.org/2005/Atom',
            'arxiv': 'http://arxiv.org/schemas/atom'
        }

        results = []
        for index, entry in enumerate(
                tree.xpath('/ns:feed/ns:entry', namespaces=ns_map)):
            new_bib = {
                'arxivid':
                entry.xpath('ns:id', namespaces=ns_map)[0].text.split('/')[-1],
                'title':
                entry.xpath('ns:title', namespaces=ns_map)[0].text,
                'abstract':
                entry.xpath('ns:summary', namespaces=ns_map)[0].text,
            }

            published = entry.xpath('ns:published', namespaces=ns_map)[0].text
            match = re.search(r"(\d{4})-(\d{2})-(\d{2})", published)

            new_bib['year'] = match.group(1)
            new_bib['month'] = str(int(match.group(2)))
            new_bib['date'] = str(int(match.group(3)))

            authors = []
            for author in entry.xpath('ns:author', namespaces=ns_map):
                bits = author.xpath('ns:name',
                                    namespaces=ns_map)[0].text.split()
                authors.append({'given': bits[0], 'family': bits[-1]})

            new_bib['author'] = authorListFromDict(authors)
            new_extra = {
                'x_authors':
                authors,
                'ax_main_category':
                entry.xpath('arxiv:primary_category',
                            namespaces=ns_map)[0].get('term'),
            }

            categories = []
            for cat in entry.xpath('ns:category', namespaces=ns_map):
                categories.append(cat.get('term'))

            new_extra['ax_categories'] = categories

            new_res = SearchResult(index, new_bib, 'arxiv', new_extra)

            for link in entry.xpath('ns:link', namespaces=ns_map):
                if link.get('title') == 'pdf':
                    addUrlIfNew(new_res, link.get('href'), 'pdf', 'arxiv')
                elif 'arxiv.org/abs/' in link.get('href'):
                    addUrlIfNew(new_res, link.get('href'), 'main', 'arxiv')

            results.append(new_res)

        return results
    def search(self, title, identity, year=None, max_results=1):
        """
        Searchs and returns a number of results from Crossref

        :param title: article title
        :param identity: email address to provide to Crossref
        :param year: publication year
        :param max_results:
        :return: list of Crossref JSON data results
        """
        urllib.parse.quote(title, safe='')
        headers = {'User-Agent': 'ReviewBuilder(mailto:%s)' % identity}
        # changed because of https://status.crossref.org/incidents/4y45gj63jsp4
        url = 'https://api.crossref.org/works?rows={}&query.bibliographic={}'.format(
            max_results, title)
        if year:
            url += '&query.published=' + str(year)

        r = self.request(url, headers)

        d = r.json()
        if d['status'] != 'ok':
            raise ValueError('Error in request:' +
                             d.get('status', 'NO STATUS') +
                             str(d.get('message', 'NO MESSAGE')))

        results = []
        for index, item in enumerate(d['message']['items']):
            # print(item.get('type'))
            new_bib = {
                'doi': item['DOI'],
                'title': basicTitleCleaning(removeListWrapper(item['title']))
            }

            if 'container-title' in item:
                # reference-entry, book

                if item.get('type') in ['journal-article', 'reference-entry']:
                    new_bib['journal'] = removeListWrapper(
                        item['container-title'])
                    new_bib['ENTRYTYPE'] = 'article'
                elif item.get('type') in ['book-chapter']:
                    new_bib['ENTRYTYPE'] = 'inbook'
                    new_bib['booktitle'] = removeListWrapper(
                        item['container-title'])
                elif item.get('type') in ['proceedings-article']:
                    new_bib['ENTRYTYPE'] = 'inproceedings'
                    new_bib['booktitle'] = removeListWrapper(
                        item['container-title'])

            if item.get('type') in ['book']:
                new_bib['ENTRYTYPE'] = 'book'

            if item.get('type') not in [
                    'journal-article', 'reference-entry', 'book',
                    'book-chapter', 'proceedings-article'
            ]:
                print(json.dumps(item, indent=3))

            for field in [
                ('publisher-location', 'address'),
                ('publisher', 'publisher'),
                ('issue', 'issue'),
                ('volume', 'volume'),
                ('page', 'pages'),
            ]:
                if field[0] in item:
                    new_bib[field[1]] = str(item[field[0]])

            if 'URL' in item:
                new_bib['url'] = item['URL']

            if "issued" in item:
                date_parts = item['issued']['date-parts'][0]
                new_bib['year'] = str(date_parts[0])
                if len(date_parts) > 1:
                    new_bib['month'] = str(date_parts[1])
                if len(date_parts) > 2:
                    new_bib['day'] = str(date_parts[2])

            authors = []
            for author in item.get('author', []):
                authors.append({
                    'given': author.get('given', ''),
                    'family': author.get('family', '')
                })

            if item.get('author'):
                new_bib['author'] = authorListFromDict(authors)

            new_extra = {
                'x_authors': authors,
                'language': item.get('language')
            }

            new_res = SearchResult(index, new_bib, 'crossref', new_extra)

            addUrlIfNew(new_res, item['URL'], 'main', 'crossref')

            if 'link' in item:
                for link in item['link']:
                    if isPDFURL(link['URL']):
                        addUrlIfNew(new_res, link['URL'], 'pdf', 'crossref')

            results.append(new_res)

        return results