def trx_abstracts(m): TRX = MaltegoTransform() doi = m.getProperty("DOI") if not doi: TRX.addUIMessage( 'A DOI is needed to perform this search. Please run "Search on Crossref" and, if a DOI is found, try again here' ) return TRX.returnOutput() client_id = Tokens.MendeleyID client_secret = Tokens.MendeleySecret mendeley = Mendeley(client_id, client_secret=client_secret) auth = mendeley.start_client_credentials_flow() session = auth.authenticate() try: abstract = session.catalog.by_identifier(doi=doi).abstract except: TRX.addUIMessage("Cannot find document on Mendeley") else: new = TRX.addEntity("me.Article", title) new.addProperty("abstract", "Abstract", 'loose', abstract.encode('utf-8')) TRX.addUIMessage(abstract) logging(TRX.returnOutput(), m.Maltegoxml) return TRX.returnOutput()
def trx_searchauthor(m): TRX = MaltegoTransform() authore = m.getProperty("person.fullname") client_id = Tokens.MendeleyID client_secret = Tokens.MendeleySecret mendeley = Mendeley(client_id, client_secret=client_secret) auth = mendeley.start_client_credentials_flow() session = auth.authenticate() page = session.catalog.advanced_search(author=authore).list() if len(page.items) < 12: limit = len(page.items) else: limit = m.Slider # print m.Slider # print limit for n in range(limit): doc = page.items[n] titlee = clean_obsession(doc.title) new = TRX.addEntity("me.Article", titlee) new.setWeight(100 - n) try: new.addProperty("abstract", "Abstract", 'strict', clean_obsession(doc.abstract).encode('utf-8')) new.addProperty("year", "Year", 'strict', str(doc.year)) new.addProperty("DOI", "DOI", 'strict', doc.identifiers['doi']) authori = [ doc.authors[n].first_name.encode('utf-8') + ' ' + doc.authors[n].last_name.encode('utf-8') for n in range(len(doc.authors)) ] new.addProperty("author", "Author", 'strict', clean_obsession('; '.join(authori))) if doc.keywords: doc_keywords = [ doc.keywords[n].encode('utf-8') for n in range(len(doc.keywords)) ] new.addProperty("keywords", "Keywords", 'strict', clean_obsession('; '.join(doc_keywords))) except: # pass # print 'o dear' TRX.addUIMessage('Article: ' + titlee + '. Not all fields could be downloaded') # print titlee # print TRX.returnOutput() # print 'santa madonna' logging(TRX.returnOutput(), m.Maltegoxml) return TRX.returnOutput()
def trx_searchciters(m): TRX = MaltegoTransform() title = m.getProperty("title.article") title = unidecode(title) # print title DOI = m.getProperty("DOI") if DOI: query = DOI else: query = title search_query = scholarly.search_pubs_query(query) try: result = next(search_query) except StopIteration: TRX.addUIMessage("""The DOI could not be found on Google Scholar, which very likely means Google Scholar has never heard of this article before""" ) return TRX.returnOutput() titlemaybe = result.bib['title'] TRX.addUIMessage( """Title found: %s. If this is not what you were looking for, add the article's DOI and search again""" % make_unicode(clean_obsession(titlemaybe)), UIM_INFORM) limit = m.slider count = 0 for citation in result.get_citedby(): if count == limit: break for i in bastardi: title = citation.bib['title'].replace(i, '') new = TRX.addEntity("me.Article", title.encode('utf-8')) # new.setLinkLabel('Cited by') # new.setLinkColor('blue') # new.setLinkThickness(2) authors = '; '.join( [authore for authore in citation.bib['author'].split(' and ')]) for i in bastardi: authors = authors.replace(i, '') new.addProperty("author", "Author", "loose", authors.encode('utf-8')) count += 1 logging(TRX.returnOutput(), m.Maltegoxml) return TRX.returnOutput()
def trx_searchtitle(m): TRX = MaltegoTransform() #me.parseArguments(sys.argv) title = m.getProperty("title.article") url = 'http://api.crossref.org/works?query=' r = requests.get(url + title + '&rows=1') titler = r.json()['message']['items'][0]['title'][0] doi = r.json()['message']['items'][0]['DOI'] URL = r.json()['message']['items'][0]['URL'] try: year = str(r.json()['message']['items'][0]['published-print']) except: try: year = str(r.json()['message']['items'][0]['published-online'] ['date-parts']) except: year = str( r.json()['message']['items'][0]['created']['date-parts']) year = year.translate(None, "{}abcdefghijklmnopqrstuvz':[]-,") try: u = r.json()['message']['items'][0]['author'] authori = [] for i in u: authore = i['family'].encode('utf-8') + ' ' + i['given'].encode( 'utf-8') + '; ' authori.append(authore) except: authori = [] authori.append(r.json()['message']['items'][0]['publisher']) new = TRX.addEntity("me.Article", title) new.addProperty("DOI", "DOI", 'strict', str(doi)) new.addDisplayInformation( '<a href="%s"> click here to go to the webpage </a>' % URL, 'DOI URL') new.addProperty("url", "url", 'strict', str(URL)) new.addProperty("year", "Year", 'strict', str(year)) new.addProperty("author", "Author", 'loose', ''.join(authori)) new.setValue( titler.encode('utf-8')) #what the hell is this??????????????????? logging(TRX.returnOutput(), m.Maltegoxml) return TRX.returnOutput()
def trx_basesearch(m): limit = m.slider url = "https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi?func=PerformSearch&format=json&query=dccreator:" TRX = MaltegoTransform() author = m.getProperty("person.fullname").strip() r = requests.get(url + '"' + author + '"' + "&hits=" + str(limit)) # import pprint # pprint.pprint(r.json()) for doc in r.json()['response']['docs']: try: new = TRX.addEntity("me.Article", doc['dctitle']) except: # print('No title') continue try: new.addProperty("year", "Year", 'strict', str(doc['dcyear'])) except: pass link = doc['dclink'] new.addDisplayInformation('<a href="' + link + '"> ' + link + ' </a>', 'Link to file') try: author = doc['dccreator'] except: # print 'no author' pass else: if type(author) is list: authors = '; '.join(o for o in author) new.addProperty("author", "Author", "strict", authors) # print 'author is list' elif type(author) is unicode: new.addProperty("author", "Author", "strict", author) # print 'author is one' try: new.addProperty("abstract", "Abstract", "strict", doc['dcdescription']) except: pass logging(TRX.returnOutput(), m.Maltegoxml) return TRX.returnOutput()
def trx_extractauthors(m): # data=open('/home/ubuntu/1.pdf','rb').read() TRX = MaltegoTransform() try: authorss = m.getProperty("author") except: TRX.addUIMessage('Sorry, it appears the article has no Author') return TRX.returnOutput() authors = authorss.split("; ") for i in authors: if i is not '': new=TRX.addEntity("maltego.Person",i) # new.addProperty('binary','Binary','strict',data) new.addDisplayInformation('<a href="C:\\Users\\carla\\Documents\\aa.pdf"> rrr </a>','rrr') # logging(TRX.returnOutput(),m.testo) logging(TRX.returnOutput(),m.Maltegoxml) return TRX.returnOutput()
def trx_cermine(m): TRX = MaltegoTransform() path = str(m.getProperty('article.path')) print path data = open(path.replace('"', ''), 'rb').read() url = 'http://cermine.ceon.pl/extract.do' headers = {'Content-Type': 'application/binary'} r = requests.post(url, data=data, headers=headers) if r.status_code == 200 and 'application/xml' in r.headers['content-type']: testo = r.content.decode('ISO-8859-2').encode('utf8') root = ET.fromstring(testo) articles = [] for title in root.iter('article-title'): articles.append([title.text]) else: TRX.addUIMessage( 'Something went wrong, Cermine could not process your file') return TRX.returnOutput() for art in articles: title = art[0] new = TRX.addEntity('me.Article', title) try: author = art[1] year = art[2] new.addProperty('author', 'Author', 'loose', author) new.addProperty('year', 'Year', 'strict', str(year)) except: pass logging(TRX.returnOutput(), m.Maltegoxml) return TRX.returnOutput()
def trx_keywords(m): TRX = MaltegoTransform() try: keywords=[x for x in (m.getProperty("keywords").split('; '))] keywords=keywords[:12] except AttributeError: TRX.addUIMessage('Silly, there are no keywords for this article. But you could insert them yourself and then try again!') return TRX.returnOutput() limit=12 # limit=m.getTransformSetting('HowMany') print limit # if not limit.isdigit(): # TRX.addUIMessage('Silly! That has to be a number') # return TRX.returnOutput() if limit>40: TRX.addUIMessage("Sorry, that's too many! Currently the limit is 12 with the free Maltego, otherwise 50") return TRX.returnOutput() elif limit>12: TRX.addUIMessage("You set a value > than 12. If you are using the free Maltego, you are still going to get 12") return TRX.returnOutput() client_id=Tokens.MendeleyID client_secret= Tokens.MendeleySecret mendeley = Mendeley(client_id, client_secret=client_secret) auth = mendeley.start_client_credentials_flow() session = auth.authenticate() try: pages = [session.catalog.search(keyword).list() for keyword in keywords] except UnicodeEncodeError: TRX.addUIMessage("""Sorry, Mendeley doesn't accept keywords with non-latin characters. Please change them yourself and try again""") return TRX.returnOutput() n_results_per_key = limit / len(pages) if n_results_per_key == 0: n_results_per_key == 1 keyplace=0 for page in pages: if len(page.items) < n_results_per_key: limit=len(page.items) else: limit=n_results_per_key for n in range(limit): doc=page.items[n] new = TRX.addEntity("me.Article", doc.title.encode('utf-8')) new.setWeight(100-n) # new.setLinkColor('0x808000') new.setLinkLabel(keywords[keyplace].encode('utf-8')) try: new.addProperty("abstract","Abstract", True, doc.abstract.encode('utf-8')) new.addProperty("year","Year", False, str(doc.year)) authori=[doc.authors[n].first_name.encode('utf-8')+' '+doc.authors[n].last_name.encode('utf-8') for n in range(0,len(doc.authors))] new.addProperty("author","Author", True, '; '.join(authori)) doc_keywords=[doc.keywords[n].encode('utf-8') for n in range(len(doc.keywords))] new.addProperty("keywords","Keywords",True, '; '.join(doc_keywords)) new.addProperty("DOI","DOI",'strict', doc.identifiers['doi']) except: TRX.addUIMessage('Article: '+doc.title.encode('utf-8')+'. Not all fields could be downloaded, probably keywords..') keyplace+=1 logging(TRX.returnOutput(),m.Maltegoxml) return TRX.returnOutput()