Example #1
0
def proxy_header(request, target_url):
    """ Proxy request so as to get around CORS
        issues for displaying PDFs with javascript
        and other needs
    """
    gapi = GeneralAPI()
    ok = True
    status_code = 404
    try:
        r = requests.head(target_url, headers=gapi.client_headers)
        status_code = r.status_code
        r.raise_for_status()
        output = {'status': status_code, 'url': target_url}
        if 'Content-Length' in r.headers:
            output['Content-Length'] = int(float(r.headers['Content-Length']))
        if 'Content-Type' in r.headers:
            output['Content-Type'] = r.headers['Content-Type']
    except:
        ok = False
        content = target_url + ' ' + str(status_code)
    if ok:
        json_output = json.dumps(output, indent=4, ensure_ascii=False)
        return HttpResponse(json_output,
                            content_type='application/json; charset=utf8')
    else:
        return HttpResponse('Fail with HTTP status: ' + str(content),
                            status=status_code,
                            content_type='text/plain')
Example #2
0
 def update_metadata(self, deposition_id, metadata_dict):
     """ updates metadata for a deposition """
     output = None
     gapi = GeneralAPI()
     headers = gapi.client_headers
     headers['Content-Type'] = 'application/json'
     deposition_id = str(deposition_id)
     url = self.url_prefix + '/api/deposit/depositions/%s' % deposition_id
     data = {'metadata': metadata_dict}
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     try:
         r = requests.put(url,
                          timeout=240,
                          headers=headers,
                          params={'access_token': self.ACCESS_TOKEN},
                          data=json.dumps(data))
         r.raise_for_status()
         output = r.status_code
     except:
         print('FAIL to update metadata with status code: ' +
               str(r.status_code))
         print(str(r.json()))
         output = False
     return output
Example #3
0
 def create_empty_deposition(self):
     """ makes a new empty deposition containter
         to recieve files and metadata
     """
     output = None
     gapi = GeneralAPI()
     headers = gapi.client_headers
     headers['Content-Type'] = 'application/json'
     url = self.url_prefix + '/api/deposit/depositions'
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     try:
         r = requests.post(url,
                           timeout=240,
                           headers=headers,
                           params={'access_token': self.ACCESS_TOKEN},
                           json={})
         r.raise_for_status()
         print('Status code: ' + str(r.status_code))
         output = r.json()
     except:
         print('FAIL with Status code: ' + str(r.status_code))
         print(str(r.json()))
         output = False
     return output
Example #4
0
 def get_deposition_meta_by_id(self, deposition_id):
     """ gets a deposition metadata object via a
         request for a JSON object from Zenodo
     """
     gapi = GeneralAPI()
     headers = gapi.client_headers
     headers['Content-Type'] = 'application/json'
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     deposition_id = str(deposition_id)
     url = self.url_prefix + '/api/deposit/depositions/%s' % deposition_id
     try:
         r = requests.get(url,
                          timeout=240,
                          headers=headers,
                          params={'access_token': self.ACCESS_TOKEN})
         r.raise_for_status()
         output = r.json()
     except:
         output = False
         print('FAIL with Status code: ' + str(r.status_code))
         print(str(r.json()))
         print('URL: ' + url)
     return output
Example #5
0
def proxy(request, target_url):
    """ Proxy request so as to get around CORS
        issues for displaying PDFs with javascript
        and other needs
    """
    gapi = GeneralAPI()
    if 'https:' in target_url:
        target_url = target_url.replace('https:', 'http:')
    if 'http://' not in target_url:
        target_url = target_url.replace('http:/', 'http://')
    ok = True
    status_code = 404
    print('Try to see: ' + target_url)
    try:
        r = requests.get(target_url, timeout=240, headers=gapi.client_headers)
        status_code = r.status_code
        r.raise_for_status()
    except:
        ok = False
        content = target_url + ' ' + str(status_code)
    if ok:
        status_code = r.status_code
        mimetype = r.headers['Content-Type']
        content = r.content
        return HttpResponse(content, status=status_code, content_type=mimetype)
    else:
        return HttpResponse('Fail with HTTP status: ' + str(content),
                            status=status_code,
                            content_type='text/plain')
Example #6
0
 def get_json_for_geonames_uri(self, geonames_uri):
     """
     gets json data from a geonames_uri
     """
     le_gen = LinkEntityGeneration()
     geonames_uri = le_gen.make_clean_uri(geonames_uri) # strip off any cruft in the URI
     geo_ex = geonames_uri.split('/')
     geonames_id = geo_ex[-1]
     url = self.json_base_url + str(geonames_id)
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     try:
         gapi = GeneralAPI()
         r = requests.get(url,
                          timeout=240,
                          headers=gapi.client_headers)
         r.raise_for_status()
         self.request_url = r.url
         json_r = r.json()
     except:
         json_r = False
     self.json_data = json_r
     return self.json_data
Example #7
0
 def upload_file_by_put(self, bucket_url, filename, full_path_file):
     """ uploads a file of filename, stored at full_path_file
         into a Zenodo deposit at location bucket_url
     """
     output = None
     if not os.path.exists(full_path_file):
         # can't find the file to upload!
         output = False
     else:
         # we found the file to upload
         if self.delay_before_request > 0:
             # default to sleep BEFORE a request is sent, to
             # give the remote service a break.
             sleep(self.delay_before_request)
         url = bucket_url + '/' + filename
         try:
             # for bigger files, use this PUT method
             # Adapted from: https://github.com/zenodo/zenodo/issues/833#issuecomment-324760423
             files = {'file': open(full_path_file, 'rb')}
             gapi = GeneralAPI()
             headers = gapi.client_headers
             headers['Accept'] = 'application/json'
             headers['Authorization'] = 'Bearer ' + self.ACCESS_TOKEN
             headers['Content-Type'] = 'application/octet-stream'
             r = requests.put(url,
                              headers=headers,
                              data=open(full_path_file, 'rb'))
             r.raise_for_status()
             output = r.json()
         except:
             output = False
             print('FAIL with Status code: ' + str(r.status_code))
             print(str(r.json()))
             print('URL: ' + url)
     return output
Example #8
0
 def get_list_records(self, url, resumption_token=None):
     """
     gets OAI-PMH list records, with an optional resumption_token
     """
     xml = None
     params = None
     if 'verb=ListRecords' not in url:
         params = {}
         params['verb'] = 'ListRecords'
     if isinstance(resumption_token, str):
         if '?' in url:
             # do this to avoid URL encoding the resumption token
             url += '&resumptionToken=' + resumption_token
         else:
             url += '?resumptionToken=' + resumption_token
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     url_content = None
     if isinstance(params, dict):
         try:
             gapi = GeneralAPI()
             r = requests.get(url,
                              params=params,
                              timeout=240,
                              headers=gapi.client_headers)
             self.request_url = r.url
             r.raise_for_status()
             url_content = r.content
         except:
             self.request_error = True
             url_content = None
     else:
         try:
             gapi = GeneralAPI()
             r = requests.get(url,
                              timeout=240,
                              headers=gapi.client_headers)
             self.request_url = r.url
             r.raise_for_status()
             url_content = r.content
         except:
             self.request_error = True
             url_content = None
     return url_content 
Example #9
0
 def cache_page_locally(self, url, payload, act_dir, filename):
     """ caches content of a page locally if successfuly downloaded
     """
     ok = False
     if url not in self.fail_urls:
         if self.delay_before_request > 0:
             # default to sleep BEFORE a request is sent, to
             # give the remote service a break.
             sleep(self.delay_before_request)
         file_path = self.define_import_directory_file(act_dir,
                                                       filename)
         try:
             gapi = GeneralAPI()
             r = requests.get(url,
                              params=payload,
                              timeout=240,
                              headers=gapi.client_headers)
             self.request_url = r.url
             r.encoding = 'utf-8'
             r.raise_for_status()
             content = str(r.content)
             saved = False
             print('Working in: ' + self.current_location)
             print('Attempting to save: ' + url)
             try:
                 # file = codecs.open(file_path, 'w', 'utf-8')
                 # file.write(codecs.BOM_UTF8)
                 # file.write(content)
                 # file.close()
                 f = open(file_path, 'w', encoding='utf-8')
                 f.write(content)
                 f.close()
                 saved = True
             except Exception as e:
                 print('Save fail: ' + str(e))
                 saved = False
             if saved is False:
                 content = unidecode(content)
                 try:
                     f = open(file_path, 'w', encoding='utf-8')
                     f.write(content)
                     f.close()
                 except Exception as e:
                     print('Save fail attempt 2: ' + str(e))
                     saved = False
             if saved:
                 ok = True
             else:
                 print('CANNOT SAVE: ' + file_path)
                 self.save_fails.append(url)
                 self.save_as_json_file(act_dir, self.save_fail_file, self.save_fails)
                 ok = False
         except:
             ok = False
             self.fail_urls.append(url)
             self.save_as_json_file(act_dir, self.fail_url_file, self.fail_urls)
     return ok
Example #10
0
    def upload_file_by_post(self,
                            deposition_id,
                            filename,
                            full_path_file,
                            ok_if_exists=True):
        """ uploads a file of filename, stored at full_path_file
            into a Zenodo deposit with deposition_id
            
            will respond with an OK if it already exists
            
            This works by POST and is NOT the preferred method
        """
        output = None
        gapi = GeneralAPI()
        headers = gapi.client_headers
        # headers['Content-Type'] = 'application/json'
        if not os.path.exists(full_path_file):
            # can't find the file to upload!
            output = False
        else:
            # we found the file to upload
            if self.delay_before_request > 0:
                # default to sleep BEFORE a request is sent, to
                # give the remote service a break.
                sleep(self.delay_before_request)
            data = {'filename': filename}
            deposition_id = str(deposition_id)
            url = self.url_prefix + '/api/deposit/depositions/%s/files' % deposition_id
            try:
                # for bigger files, this will not work routinely
                # See fix at: https://github.com/zenodo/zenodo/issues/833

                with open(full_path_file, 'rb') as f:
                    # stream the upload of the files, which can be really big!
                    files = {'file': f}
                    r = requests.post(
                        url,
                        timeout=240,
                        headers=headers,
                        params={'access_token': self.ACCESS_TOKEN},
                        data=data,
                        files=files)
                    r.raise_for_status()
                    output = r.json()
            except:
                output = False
                if ok_if_exists and 'message' in r.json():
                    if r.json()['message'] == 'Filename already exists.':
                        print('File already exists, with status code: ' +
                              str(r.status_code))
                        output = True
                if output is False:
                    # some other reason for failure
                    print('FAIL with Status code: ' + str(r.status_code))
                    print(str(r.json()))
                    print('URL: ' + url)
        return output
Example #11
0
 def search_admin_entity(self,
                         q_str,
                         admin_level=0,
                         username='******',
                         lat=None,
                         lon=None,
                         degree_dif=.5):
     """ searches for an entity of a given administrative type
         associated for a given q_str
     """
     output = None
     all_params = {}
     all_params['q'] = q_str
     all_params['username'] = username
     all_params['maxRows'] = 1
     if isinstance(lat, float) \
        and isinstance(lon, float) \
        and isinstance(degree_dif, float):
         all_params['east'] = lon - degree_dif
         all_params['west'] = lon + degree_dif
         all_params['south'] = lat - degree_dif
         all_params['north'] = lat + degree_dif
     if admin_level == 0:
         fcodes = ['PCLI',
                   'OCN']
     elif admin_level == 1:
         fcodes = ['ADM1']
     elif admin_level == 2:
         fcodes = ['ADM2']
     else:
         fcodes = [None]
     for fcode in fcodes:
         params = all_params
         if isinstance(fcode, str):
             params['fcode'] = fcode
         if self.delay_before_request > 0:
             # default to sleep BEFORE a request is sent, to
             # give the remote service a break.
             sleep(self.delay_before_request)
         try:
             gapi = GeneralAPI()
             r = requests.get(self.SEARCH_BASE_URL,
                              params=params,
                              timeout=10,
                              headers=gapi.client_headers)
             r.raise_for_status()
             # print('Checking: ' + r.url)
             json_r = r.json()
         except:
             json_r = False
         if json_r is not False:
             output = json_r
             break
     return output
Example #12
0
 def get_turtle_text(self, url):
     """ gets the turtle manifest as
         a string
     """
     try:
         gapi = GeneralAPI()
         r = requests.get(url, timeout=240, headers=gapi.client_headers)
         r.raise_for_status()
         turtle = r.text
     except:
         print('Failed to get ' + url)
         turtle = False
     return turtle
Example #13
0
 def request_json_str(self, url):
     """requests JSON as a string from a URl """
     json_output = None
     try:
         gapi = GeneralAPI()
         r = requests.get(url, timeout=240, headers=gapi.client_headers)
         r.raise_for_status()
         json_obj = r.json()
     except:
         json_obj = False
     if isinstance(json_obj, dict):
         json_output = json.dumps(json_obj, indent=4, ensure_ascii=False)
     return json_output
Example #14
0
 def get_periodo_data(self):
     """
     gets json-ld data from Periodo
     """
     url = self.data_url
     try:
         gapi = GeneralAPI()
         r = requests.get(url, timeout=240, headers=gapi.client_headers)
         r.raise_for_status()
         json_r = r.json()
     except:
         json_r = False
     self.periodo_data = json_r
     return json_r
Example #15
0
 def get_unit_json(self, unit_id):
     """
     gets json data from tDAR in response to a keyword search
     """
     url = self.base_json_url + unit_id
     try:
         gapi = GeneralAPI()
         r = requests.get(url, timeout=240, headers=gapi.client_headers)
         self.request_url = r.url
         r.raise_for_status()
         json_r = r.json()
     except:
         self.request_error = True
         json_r = False
     return json_r
Example #16
0
 def get_remote_text_from_url(self, url):
     """ gets remote text content from a URL """
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     try:
         gapi = GeneralAPI()
         r = requests.get(url, timeout=240, headers=gapi.client_headers)
         self.request_url = r.url
         r.raise_for_status()
         text = r.text
     except:
         self.request_error = True
         text = False
     return text
Example #17
0
 def get_read_csv(self, url):
     """
     gets json daa from a geonames_uri
     """
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     try:
         gapi = GeneralAPI()
         r = requests.get(url, timeout=240, headers=gapi.client_headers)
         r.raise_for_status()
         csvfile = r.text.split('\n')
         self.csv_data = csv.reader(csvfile)
     except:
         self.csv_data = False
     return self.csv_data
Example #18
0
 def get_search_json(self, url):
     """
     gets json data from Open Context in response to a keyword search
     """
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     try:
         gapi = GeneralAPI()
         r = requests.get(url, timeout=240, headers=gapi.client_headers)
         self.request_url = r.url
         r.raise_for_status()
         json_r = r.json()
     except:
         self.request_error = True
         json_r = False
     return json_r
Example #19
0
 def get_search_html(self, url):
     """
     Get HTML from Open Context from a URL, do nothing with the data
     however.
     """
     gapi = GeneralAPI()
     headers = gapi.client_headers
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     try:
         r = requests.get(url,
                          timeout=240,
                          headers=headers)
         r.raise_for_status()
         ok = True
     except:
         ok = False
     return ok
Example #20
0
 def get_save_legacy_csv(self, table_id):
     """ gets and saves the legacy csv files
         from open context
     """
     sleep(self.delay_before_request)
     dir_file = self.set_check_directory(self.table_dir) + table_id + '.csv'
     url = self.LEGACY_TAB_BASE_URI + table_id + '.csv'
     print('Working on :' + url)
     try:
         gapi = GeneralAPI()
         r = requests.get(url,
                          timeout=480,
                          headers=gapi.client_headers)
         r.raise_for_status()
         text = r.text
     except:
         text = False
     if text is not False:
         f = open(dir_file, 'w', encoding='utf-8')
         f.write(text)
         f.close()
Example #21
0
 def get_search_json(self, url):
     """
     Gets json data from Open Context search API
     """
     gapi = GeneralAPI()
     headers = gapi.client_headers
     headers['accept'] = 'application/json'
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     try:
         r = requests.get(url,
                          timeout=240,
                          headers=headers)
         r.raise_for_status()
         json_r = r.json()
     except:
         self.request_errors.append(url)
         json_r = False
     return json_r
Example #22
0
 def get_keyword_search_json(self, keyword):
     """
     gets json data from Arachne in response to a keyword search
     """
     payload = {'q': keyword}
     if self.filter_by_images:
         payload['fq'] = 'facet_image:ja'
     url = self.DEFAULT_API_BASE_URL
     try:
         gapi = GeneralAPI()
         r = requests.get(url,
                          params=payload,
                          timeout=240,
                          headers=gapi.client_headers)
         self.set_arachne_search_urls(r.url)
         r.raise_for_status()
         json_r = r.json()
     except:
         json_r = False
     self.arachne_json_r = json_r
     return json_r
Example #23
0
 def get_basic_json_from_uri(self, orcid_uri):
     """
     gets json daa from the ORCID URI
     """
     url = self.make_orcid_api_url(orcid_uri)
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     try:
         gapi = GeneralAPI()
         headers = gapi.client_headers
         headers['Accept'] = 'application/json'
         r = requests.get(url, timeout=240, headers=headers)
         r.raise_for_status()
         self.response_headers = r.headers
         self.request_url = r.url
         json_r = r.json()
     except:
         json_r = False
     self.json_data = json_r
     return self.json_data
Example #24
0
 def get_jsonld_for_uri(self, uri):
     """
     gets json-ld daa from the OCHRE URI
     """
     le_gen = LinkEntityGeneration()
     uri = le_gen.make_clean_uri(uri)  # strip off any cruft in the URI
     url = uri + '.jsonld'
     self.request_url = url
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     try:
         gapi = GeneralAPI()
         r = requests.get(url, timeout=240, headers=gapi.client_headers)
         r.raise_for_status()
         self.request_url = r.url
         json_r = r.json()
     except:
         json_r = False
     self.json_data = json_r
     return self.json_data
Example #25
0
 def get_keyword_search_json(self, keyword, keyword_type):
     """
     gets json data from tDAR in response to a keyword search
     """
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     payload = {'term': keyword, 'keywordType': keyword_type}
     url = self.KEYWORD_API_BASE_URL
     try:
         gapi = GeneralAPI()
         r = requests.get(url,
                          params=payload,
                          timeout=240,
                          headers=gapi.client_headers)
         self.request_url = r.url
         r.raise_for_status()
         json_r = r.json()
     except:
         self.request_error = True
         json_r = False
     return json_r
Example #26
0
 def get_trench_book_index(self):
     """
     gets the trench book index
     """
     content = None
     if self.delay_before_request > 0:
         # default to sleep BEFORE a request is sent, to
         # give the remote service a break.
         sleep(self.delay_before_request)
     payload = {'oc': True}
     url = self.trench_book_index_url
     try:
         gapi = GeneralAPI()
         r = requests.get(url,
                          params=payload,
                          timeout=240,
                          headers=gapi.client_headers)
         self.request_url = r.url
         r.raise_for_status()
         content = r.text
     except:
         self.request_error = True
         content = self.get_trench_book_index_from_file()
     return content
Example #27
0
 def get_arachne_json(self, payload):
     """
     executes a search for json data from arachne
     """
     if isinstance(payload, dict):
         if self.filter_by_images:
             payload['fq'] = 'facet_image:"ja"'
         url = self.DEFAULT_API_BASE_URL
         try:
             gapi = GeneralAPI()
             r = requests.get(url,
                              params=payload,
                              timeout=240,
                              headers=gapi.client_headers)
             print('r url: ' + r.url)
             self.set_arachne_search_urls(r.url)
             r.raise_for_status()
             json_r = r.json()
         except:
             json_r = False
     else:
         json_r = False
     self.arachne_json_r = json_r
     return json_r