Python Link.Linkの例、perma.models.Link.Link Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_models.py プロジェクト: clare-stanton/perma

    def test_org_link_count_this_year(self):
        '''
            Should include links created this year and exclude links
            older than that.
        '''
        r = Registrar()
        r.save()
        o = Organization(registrar=r)
        o.save()
        self.assertEqual(o.link_count_this_year(), 0)

        now = tz_datetime(timezone.now().year, 1, 1)
        two_years_ago = tz_datetime(now.year - 2, 1, 1)
        user = LinkUser()
        user.save()
        link_pks = ["AAAA-AAAA", "BBBB-BBBB", "CCCC-CCCC"]
        too_early = Link(creation_timestamp=two_years_ago,
                         guid=link_pks[0],
                         created_by=user,
                         organization=o)
        too_early.save()
        now1 = Link(creation_timestamp=now,
                    guid=link_pks[1],
                    created_by=user,
                    organization=o)
        now1.save()
        now2 = Link(creation_timestamp=now,
                    guid=link_pks[2],
                    created_by=user,
                    organization=o)
        now2.save()

        links = Link.objects.filter(pk__in=link_pks)
        self.assertEqual(len(links), 3)
        self.assertEqual(o.link_count_this_year(), 2)

コード例 #2

0

ファイルを表示

ファイル: test_models.py プロジェクト: clare-stanton/perma

    def test_link_count_valid_period(self):
        '''
            Should include links created only in the target year
        '''
        now = tz_datetime(timezone.now().year, 1, 1)
        two_years_ago = tz_datetime(now.year - 2, 1, 1)
        three_years_ago = tz_datetime(now.year - 3, 1, 1)
        user = LinkUser()
        user.save()
        link_pks = [
            "AAAA-AAAA", "BBBB-BBBB", "CCCC-CCCC", "DDDD-DDDD", "EEEE-EEEE"
        ]
        older = Link(creation_timestamp=three_years_ago,
                     guid=link_pks[0],
                     created_by=user)
        older.save()
        old = Link(creation_timestamp=two_years_ago,
                   guid=link_pks[1],
                   created_by=user)
        old.save()
        now1 = Link(creation_timestamp=now, guid=link_pks[2], created_by=user)
        now1.save()
        now2 = Link(creation_timestamp=now, guid=link_pks[3], created_by=user)
        now2.save()
        now3 = Link(creation_timestamp=now, guid=link_pks[4], created_by=user)
        now3.save()

        links = Link.objects.filter(pk__in=link_pks)
        self.assertEqual(len(links), 5)
        self.assertEqual(
            link_count_in_time_period(links, three_years_ago, two_years_ago),
            2)

コード例 #3

0

ファイルを表示

ファイル: test_models.py プロジェクト: clare-stanton/perma

    def test_most_active_org_in_time_period_valid_period(self):
        '''
            Should include links created only in the target year
        '''
        now = tz_datetime(timezone.now().year, 1, 1)
        two_years_ago = tz_datetime(now.year - 2, 1, 1)
        three_years_ago = tz_datetime(now.year - 3, 1, 1)

        r = Registrar()
        r.save()
        o1 = Organization(registrar=r)
        o1.save()
        o2 = Organization(registrar=r)
        o2.save()
        user = LinkUser()
        user.save()
        link_pks = [
            "AAAA-AAAA", "BBBB-BBBB", "CCCC-CCCC", "DDDD-DDDD", "EEEE-EEEE"
        ]

        too_early1 = Link(creation_timestamp=three_years_ago,
                          guid=link_pks[0],
                          organization=o1,
                          created_by=user)
        too_early1.save()
        too_early2 = Link(creation_timestamp=three_years_ago,
                          guid=link_pks[1],
                          organization=o1,
                          created_by=user)
        too_early2.save()

        now1 = Link(creation_timestamp=now,
                    guid=link_pks[2],
                    organization=o1,
                    created_by=user)
        now1.save()
        now2 = Link(creation_timestamp=now,
                    guid=link_pks[3],
                    organization=o2,
                    created_by=user)
        now2.save()
        now3 = Link(creation_timestamp=now,
                    guid=link_pks[4],
                    organization=o2,
                    created_by=user)
        now3.save()

        # organization 1 was more active in the past
        self.assertEqual(
            most_active_org_in_time_period(r.organizations, three_years_ago,
                                           two_years_ago), o1)
        # but organization 2 was more active during the period in question
        self.assertEqual(
            most_active_org_in_time_period(r.organizations, two_years_ago), o2)
        # with a total of three links, organization 1 has been more active over all
        self.assertEqual(most_active_org_in_time_period(r.organizations), o1)

コード例 #4

0

ファイルを表示

def create_capture_job(user, human=True):
    link = Link(created_by=user, submitted_url="http://example.com")
    link.save()
    capture_job = CaptureJob(created_by=user,
                             link=link,
                             human=human,
                             status='pending')
    capture_job.save()
    return capture_job

コード例 #5

0

ファイルを表示

ファイル: test_models.py プロジェクト: clare-stanton/perma

    def test_registrar_link_count_this_year(self):
        '''
            Should include links created this year and exclude links
            older than that. Should work across all its orgs.
        '''
        r = Registrar()
        r.save()
        o1 = Organization(registrar=r)
        o1.save()
        o2 = Organization(registrar=r)
        o2.save()

        now = tz_datetime(timezone.now().year, 1, 1)
        two_years_ago = tz_datetime(now.year - 2, 1, 1)
        user = LinkUser()
        user.save()
        link_pks = ["AAAA-AAAA", "BBBB-BBBB", "CCCC-CCCC", "DDDD-DDDD"]
        too_early = Link(creation_timestamp=two_years_ago,
                         guid=link_pks[0],
                         created_by=user,
                         organization=o1)
        too_early.save()
        now1 = Link(creation_timestamp=now,
                    guid=link_pks[1],
                    created_by=user,
                    organization=o1)
        now1.save()
        now2 = Link(creation_timestamp=now,
                    guid=link_pks[2],
                    created_by=user,
                    organization=o1)
        now2.save()
        now3 = Link(creation_timestamp=now,
                    guid=link_pks[3],
                    created_by=user,
                    organization=o2)
        now3.save()

        links = Link.objects.filter(pk__in=link_pks)
        self.assertEqual(len(links), 4)
        self.assertEqual(r.link_count_this_year(), 3)

コード例 #6

0

ファイルを表示

ファイル: test_models.py プロジェクト: clare-stanton/perma

    def test_registrar_most_active_org_this_year(self):
        '''
            Should return the org (whole object)with the most links
            created this year, or None if it has no orgs with links
            created this year.
        '''
        r = Registrar()
        r.save()
        self.assertEqual(type(r.most_active_org_this_year()), type(None))

        o1 = Organization(registrar=r)
        o1.save()
        o2 = Organization(registrar=r)
        o2.save()

        now = tz_datetime(timezone.now().year, 1, 1)
        two_years_ago = tz_datetime(now.year - 2, 1, 1)
        user = LinkUser()
        user.save()
        link_pks = [
            "AAAA-AAAA", "BBBB-BBBB", "CCCC-CCCC", "DDDD-DDDD", "EEEE-EEEE",
            "FFFF-FFFF"
        ]
        too_early = Link(creation_timestamp=two_years_ago,
                         guid=link_pks[0],
                         created_by=user,
                         organization=o1)
        too_early.save()
        self.assertEqual(type(r.most_active_org_this_year()), type(None))

        now1 = Link(creation_timestamp=now,
                    guid=link_pks[1],
                    created_by=user,
                    organization=o1)
        now1.save()
        now2 = Link(creation_timestamp=now,
                    guid=link_pks[2],
                    created_by=user,
                    organization=o1)
        now2.save()
        now3 = Link(creation_timestamp=now,
                    guid=link_pks[3],
                    created_by=user,
                    organization=o2)
        now3.save()

        self.assertEqual(r.most_active_org_this_year(), o1)

        now4 = Link(creation_timestamp=now,
                    guid=link_pks[4],
                    created_by=user,
                    organization=o2)
        now4.save()
        now5 = Link(creation_timestamp=now,
                    guid=link_pks[5],
                    created_by=user,
                    organization=o2)
        now5.save()

        self.assertEqual(r.most_active_org_this_year(), o2)

コード例 #7

0

ファイルを表示

ファイル: test_models.py プロジェクト: clare-stanton/perma

    def test_link_count_period_equal_dates(self):
        '''
            If end date = start date, links are only counted once
        '''
        now = tz_datetime(timezone.now().year, 1, 1)
        user = LinkUser()
        user.save()
        link = Link(creation_timestamp=now, guid="AAAA-AAAA", created_by=user)
        link.save()

        links = Link.objects.filter(pk=link.pk)
        self.assertEqual(len(links), 1)
        self.assertEqual(link_count_in_time_period(links, now, now),
                         len(links))

コード例 #8

0

ファイルを表示

    def test_link_count_regular_user(self):
        """ We do some link count tallying on save """

        link_count = self.regular_user.link_count
        link = Link(created_by=self.regular_user, submitted_url="http://example.com")
        link.save()

        self.regular_user.refresh_from_db()
        self.assertEqual(link_count + 1, self.regular_user.link_count)

        link.safe_delete()
        link.save()

        self.regular_user.refresh_from_db()
        self.assertEqual(link_count, self.regular_user.link_count)

コード例 #9

0

ファイルを表示

    def test_link_count_for_orgs(self):
        """ We do some link count tallying on save. Let's make sure
        we're adjusting the counts on the orgs """

        org_to_which_user_belongs = self.org_user.organizations.all().first()
        link_count = org_to_which_user_belongs.link_count
        link = Link(created_by=self.org_user, submitted_url="http://example.com", organization=org_to_which_user_belongs)
        link.save()

        org_to_which_user_belongs.refresh_from_db()
        self.assertEqual(link_count + 1, org_to_which_user_belongs.link_count)

        link.safe_delete()
        link.save()

        org_to_which_user_belongs.refresh_from_db()
        self.assertEqual(link_count, org_to_which_user_belongs.link_count)

コード例 #10

0

ファイルを表示

    def test_link_count_for_registrars(self):
        """ We do some link count tallying on save. Let's make sure
        we're adjusting the counts on the registrars """

        registrar_to_which_user_belongs = self.registrar_user.registrar
        link_count = registrar_to_which_user_belongs.link_count
        org_managed_by_registrar = registrar_to_which_user_belongs.organizations.all().first()
        link = Link(created_by=self.registrar_user, submitted_url="http://example.com", organization=org_managed_by_registrar)
        link.save()

        registrar_to_which_user_belongs.refresh_from_db()
        self.assertEqual(link_count + 1, registrar_to_which_user_belongs.link_count)

        link.safe_delete()
        link.save()

        registrar_to_which_user_belongs.refresh_from_db()
        self.assertEqual(link_count, registrar_to_which_user_belongs.link_count)

コード例 #11

0

ファイルを表示

ファイル: test_link_resource.py プロジェクト: jay1803/perma

    def test_delete_bonus_link(self):
        # make a bonus link here, rather than messing with the fixtures
        bonus_link = Link(created_by=self.regular_user, bonus_link=True)
        bonus_link.save()
        bonus_link_url = "{0}/{1}".format(self.list_url, bonus_link.pk)

        # establish baseline
        links_remaining, _ , bonus_links = self.regular_user.get_links_remaining()
        self.assertEqual(links_remaining, 6)
        self.assertEqual(bonus_links, 0)

        # delete the bonus link
        self.successful_delete(bonus_link_url, user=self.regular_user)
        self.regular_user.refresh_from_db()

        # assertions
        links_remaining, links_remaining_period, bonus_links = self.regular_user.get_links_remaining()
        self.assertEqual(links_remaining, 6)
        self.assertEqual(bonus_links, 1)

コード例 #12

0

ファイルを表示

    def validate(self, data):
        user = self.context['request'].user
        errors = {}

        # since 'file' is not a field on the model, we have to access it through request.data rather than data
        uploaded_file = self.context['request'].data.get('file')

        # handle is_private and private_reason:
        if self.instance:
            if not user.is_staff:
                # only staff can manually change private_reason in all cases
                data.pop('private_reason', None)

                # if updating privacy, make sure user is allowed to change private status
                if 'is_private' in data and self.instance.is_private != bool(
                        data['is_private']):
                    if self.instance.private_reason and self.instance.private_reason not in [
                            'user', 'old_policy'
                    ]:
                        errors['is_private'] = 'Cannot change link privacy.'
                    else:
                        data['private_reason'] = 'user' if data[
                            'is_private'] else None
        else:
            # for new links, set private_reason based on is_private
            data['private_reason'] = 'user' if data.get('is_private') else None

        # check submitted URL for new link
        if not self.instance:
            if not data.get('submitted_url'):
                errors['url'] = "URL cannot be empty."
            else:
                try:
                    validate = URLValidator()
                    temp_link = Link(submitted_url=data['submitted_url'])
                    validate(temp_link.ascii_safe_url)

                    # Don't force URL resolution validation if a file is provided
                    if not uploaded_file:
                        if not temp_link.ip:
                            errors['url'] = "Couldn't resolve domain."
                        elif not ip_in_allowed_ip_range(temp_link.ip):
                            errors['url'] = "Not a valid IP."
                        elif not temp_link.headers:
                            errors['url'] = "Couldn't load URL."
                        else:
                            # preemptively reject URLs that report a size over settings.MAX_ARCHIVE_FILE_SIZE
                            try:
                                if int(
                                        temp_link.headers.get(
                                            'content-length', 0)
                                ) > settings.MAX_ARCHIVE_FILE_SIZE:
                                    errors[
                                        'url'] = "Target page is too large (max size %sMB)." % (
                                            settings.MAX_ARCHIVE_FILE_SIZE /
                                            1024 / 1024)
                            except ValueError:
                                # content-length header wasn't an integer. Carry on.
                                pass
                except DjangoValidationError:
                    errors['url'] = "Not a valid URL."
                except TooManyRedirects:
                    errors['url'] = "URL caused a redirect loop."

        # check uploaded file
        if uploaded_file == '':
            errors['file'] = "File cannot be blank."
        elif uploaded_file:

            if self.instance and self.instance.is_permanent():
                errors[
                    'file'] = "Archive contents cannot be replaced after 24 hours"

            else:
                mime_type = get_mime_type(uploaded_file.name)

                # Get mime type string from tuple
                if not mime_type or not mime_type_lookup[mime_type][
                        'valid_file'](uploaded_file):
                    errors['file'] = "Invalid file."
                elif uploaded_file.size > settings.MAX_ARCHIVE_FILE_SIZE:
                    errors['file'] = "File is too large."

        if errors:
            raise serializers.ValidationError(errors)

        return data

コード例 #13

0

ファイルを表示

ファイル: tasks.py プロジェクト: leppert/perma

def update_perma(link_guid):
    """
    Update the vested/darchived status of a perma link, and download the
    assets if necessary
    """
    # N.B. This function has two instances of downloading stuff from
    # the root server using a scheme that looks something like
    #    settings.SERVER + reverse("url_pattern")
    # This is nice because it means we don't have to repeat our URL
    # patterns from urls.py, but it hardcodes the fact that the root
    # server is another Perma instance. It's unclear to me which is a
    # better fact to abstract, but this is easier for now.

    ## First, let's get the metadata for this link. The metadata
    ## contains information about where we should place the assets (if
    ## we decide that we need them). This is also a fast check to make
    ## sure the link GUID is actually real.
    metadata_server = settings.UPSTREAM_SERVER['address']
    metadata_url = metadata_server + reverse("service_link_status", args=(link_guid,))
    metadata = requests.get(
        metadata_url,
        headers=settings.UPSTREAM_SERVER.get('headers', {})
    ).json()

    ## Next, let's see if we need to get the assets. If we have the
    ## Link object for this GUID, we're going to assume we already
    ## have what we need. It would make a little more sense to use the
    ## Asset object here instead, but we're definitely going to need
    ## to do stuff to the Link object so we might as well get that
    ## instead. In practice they should be ~one to one.
    try:
        link = Link.objects.get(guid=link_guid)
    except Link.DoesNotExist:
        ## We need to download the assets. We can download an archive
        ## from the assets server.
        assets_server = settings.UPSTREAM_SERVER['address']
        assets_url = assets_server + reverse("mirroring:link_assets", args=(link_guid,))

        # Temp paths can be relative because we're in run_in_tempdir()
        temp_zip_path = 'temp.zip'

        # Save remote zip file to disk, using streaming to avoid keeping large files in RAM.
        request = requests.get(
            assets_url,
            headers=settings.UPSTREAM_SERVER.get('headers', {}),
            stream=True)
        with open(temp_zip_path, 'wb') as f:
            for chunk in request.iter_content(1024):
                f.write(chunk)

        ## Extract the archive and change into the extracted folder.
        with zipfile.ZipFile(temp_zip_path, "r") as zipfh:
            #assets_path = os.path.dirname(os.path.join(settings.MEDIA_ROOT, metadata["path"]))
            zipfh.extractall() # creates folder named [guid] in current temp dir
        temp_extracted_path = os.path.basename(metadata['path']) # e.g. "1234-ABCD"

        # Save all extracted files to default_storage, using the path in metadata.
        for root, dirs, files in os.walk(temp_extracted_path):
            for file in files:
                source_file_path = os.path.join(root, file) # e.g. "1234-ABCD/cap.png"
                dest_file_path = os.path.join(os.path.dirname(metadata['path']), source_file_path) # e.g. 2014/6/10/18/37/1234-ABCD/cap.png
                with open(source_file_path, 'rb') as source_file:
                    default_storage.store_file(source_file, dest_file_path)

        ## We can now get some additional metadata that we'll need to
        ## create the Link object.
        with open(os.path.join(temp_extracted_path, "metadata.json"), "r") as fh:
            link_metadata = json.load(fh)

        ## We now have everything we need to initialize the Link object.
        link = Link(guid=link_guid)
        link.submitted_url = link_metadata["submitted_url"]
        link.submitted_title = link_metadata["submitted_title"]
        link.created_by = None # XXX maybe we should do something with FakeUser here
        link.save(pregenerated_guid=True) # We need to save this so that we can create an Asset object

        # This is a stupid hack to overcome the fact that the Link has
        # auto_now_add=True, so it's always going to be saved to the
        # current time on first creation.
        link.creation_timestamp = unserialize_datetime(link_metadata["creation_timestamp"])
        link.save()

        ## Lastly, let's create an Asset object for this Link.
        asset = Asset(link=link)
        asset.base_storage_path = metadata["path"]
        asset.image_capture = metadata["image_capture"]
        asset.warc_capture = metadata["source_capture"]
        asset.pdf_capture = metadata["pdf_capture"]
        asset.text_capture = metadata["text_capture"]
        asset.save()

    ## We can now add some of the data we got from the metadata to the Link object
    link.dark_archived = metadata["dark_archived"]
    link.vested = metadata["vested"]
    link.save()

    # If we have sub-mirrors, poke them to get a copy from us.
    if settings.DOWNSTREAM_SERVERS:
        run_task(poke_mirrors, link_guid=link_guid)