Beispiel #1
0
def get_common_data(result):
  specimen_url = digimorph.get_specimen_url(result['specimen_url'])
  
  slice_data = None
  if result.get('slice_count', 0) > 0:
    slice_urls = digimorph.get_slice_urls(specimen_url,
                          result.get('slice_count', 100),
                          result.get('zero_padding', 1))
    slice_data = {
      'has_slices': 'slice_count' in result,
      'slice_urls': json.dumps(slice_urls),
      'first_slice': slice_urls[int(math.floor(len(slice_urls)/2))]
    }

  return {
    'title': result.get('scientific_name') if result.get('scientific_name') else result.get('species'), 
    'digimorph_url': specimen_url,
    'imageUrl': digimorph.get_preview_url(specimen_url),
    'slice_data': slice_data,
    'classification': [
        result.get('phylum', None),
        result.get('class', None),
        result.get('order', None),
        result.get('family', None),
        result.get('genus', None)
      ]
  }
Beispiel #2
0
def get_common_data(result):
    specimen_url = digimorph.get_specimen_url(result['specimen_url'])

    slice_data = None
    if result.get('slice_count', 0) > 0:
        slice_urls = digimorph.get_slice_urls(specimen_url,
                                              result.get('slice_count', 100),
                                              result.get('zero_padding', 1))
        slice_data = {
            'has_slices': 'slice_count' in result,
            'slice_urls': json.dumps(slice_urls),
            'first_slice': slice_urls[int(math.floor(len(slice_urls) / 2))]
        }

    return {
        'title':
        result.get('scientific_name')
        if result.get('scientific_name') else result.get('species'),
        'digimorph_url':
        specimen_url,
        'imageUrl':
        digimorph.get_preview_url(specimen_url),
        'slice_data':
        slice_data,
        'classification': [
            result.get('phylum', None),
            result.get('class', None),
            result.get('order', None),
            result.get('family', None),
            result.get('genus', None)
        ]
    }
Beispiel #3
0
    urls.extend(val['urls'])

def get_page(url):
    print "getting " + url
    response = urllib2.urlopen(url)
    return response.read()

species_data = {}

date_re = re.compile('Publication Date:([^<]*)', re.DOTALL)

for u in urls:
    data = {}

    try:
        html = get_page(digimorph.get_specimen_url(u))
        soup = BeautifulSoup(html, 'html.parser')
        
        # Author and institution
        author_link = soup.select('.author a')[0]
        institution = soup.select('.institution')[0]
        data['author_url'] = author_link.get('href').strip()
        data['author_name'] = author_link.get_text().strip()
        data['author_name'] = author_link.get_text().strip()
        data['institution'] = institution.get_text().strip()

        # Image data
        image_processing = soup.body.find_all(string=re.compile('Image processing'))
        image_processing_links = image_processing[0].parent.find_all('a')
        date_string = image_processing[0].parent.contents[-1].get_text();
    return max_slice - step


all_species = {}
for line in open('url_map.json'):
    all_species.update(json.loads(line))
pprint(all_species)

urls = []
for val in all_species.values():
    urls.extend(val['urls'])

pprint(urls)
slice_data = {}
for u in urls:
    specimen_url = digimorph.get_specimen_url(u)
    padding = 0
    max_slice = 0
    has_slices = True

    print "Finding images for %s" % specimen_url
    print digimorph.get_slice_url(specimen_url, 1, 3)
    # 3 or 4 padding?
    if has_image(digimorph.get_slice_url(specimen_url, 1, 3)):
        padding = 3
        print "Padding 3!"
    elif has_image(digimorph.get_slice_url(specimen_url, 1, 4)):
        padding = 4
        print "Padding 4!"
    else:
        print "Failed to find first image with 3 or 4 padding"
Beispiel #5
0
    return max_slice - step


all_species = {}
for line in open('url_map.json'):
    all_species.update(json.loads(line))
pprint(all_species)

urls = []
for val in all_species.values():
    urls.extend(val['urls'])

pprint(urls)
slice_data = {}
for u in urls:
    specimen_url = digimorph.get_specimen_url(u)
    padding = 0
    max_slice = 0
    has_slices = True

    print "Finding images for %s" % specimen_url
    print digimorph.get_slice_url(specimen_url, 1, 3)
    # 3 or 4 padding?
    if has_image(digimorph.get_slice_url(specimen_url, 1, 3)):
        padding = 3
        print "Padding 3!"
    elif has_image(digimorph.get_slice_url(specimen_url, 1, 4)):
        padding = 4
        print "Padding 4!"
    else:
        print "Failed to find first image with 3 or 4 padding"