Ejemplo n.º 1
0
def save_bare_tags(page_size=5000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    count = 0
    start = datetime.now()
    total = MODMTag.find().count()

    while count < total:
        with transaction.atomic():
            tags = []
            for modm_tag in MODMTag.find().sort('-_id')[count:count +
                                                        page_size]:
                tags.append(Tag(_id=modm_tag._id,
                                lower=modm_tag.lower,
                                system=False))
                count += 1
                if count % page_size == 0 or count == total:
                    then = datetime.now()
                    print 'Saving tags {} through {}...'.format(
                        count - page_size, count)
                    woot = Tag.objects.bulk_create(tags)
                    now = datetime.now()
                    print 'Done with {} tags in {} seconds...'.format(
                        len(woot), (now - then).total_seconds())
                    tags = None
                    woot = None
                    trash = gc.collect()
                    print 'Took out {} trashes'.format(trash)

    print 'MODM Tags: {}'.format(total)
    print 'django Tags: {}'.format(Tag.objects.all().count())
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Ejemplo n.º 2
0
def save_bare_tags(page_size=5000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    count = 0
    start = datetime.now()
    total = MODMTag.find().count()

    while count < total:
        with transaction.atomic():
            tags = []
            for modm_tag in MODMTag.find().sort('-_id')[count:count +
                                                        page_size]:
                tags.append(
                    Tag(_id=modm_tag._id, lower=modm_tag.lower, system=False))
                count += 1
                if count % page_size == 0 or count == total:
                    then = datetime.now()
                    print 'Saving tags {} through {}...'.format(
                        count - page_size, count)
                    woot = Tag.objects.bulk_create(tags)
                    now = datetime.now()
                    print 'Done with {} tags in {} seconds...'.format(
                        len(woot), (now - then).total_seconds())
                    tags = None
                    woot = None
                    trash = gc.collect()
                    print 'Took out {} trashes'.format(trash)

    print 'MODM Tags: {}'.format(total)
    print 'django Tags: {}'.format(Tag.objects.all().count())
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Ejemplo n.º 3
0
 def test_add_tag(self):
     file_ = self.root.append_file('That\'s How Strong My Love Is.mp3')
     tag = Tag(_id='Redding')
     tag.save()
     file_.tags.append(tag)
     file_.save()
     find = query_tag_file('Redding')['results']
     assert_equal(len(find), 1)
Ejemplo n.º 4
0
 def test_add_tag(self):
     file_ = self.root.append_file('That\'s How Strong My Love Is.mp3')
     tag = Tag(_id='Redding', name='Redding')
     tag.save()
     file_.tags.add(tag)
     file_.save()
     find = query_tag_file('Redding')['results']
     assert_equal(len(find), 1)
Ejemplo n.º 5
0
 def test_file_remove_tag(self):
     file = self.node_settings.get_root().append_file('Champion.mp3')
     tag = Tag(name='Graduation')
     tag.save()
     file.tags.add(tag)
     file.save()
     assert_in('Graduation', file.tags.values_list('name', flat=True))
     url = self.project.api_url_for('osfstorage_remove_tag', fid=file._id)
     self.app.delete_json(url, {'tag': 'Graduation'}, auth=self.user.auth)
     file.reload()
     assert_not_in('Graduation', file.tags.values_list('name', flat=True))
Ejemplo n.º 6
0
    def test_file_add_tag_fail_doesnt_create_log(self, mock_log):
        file = self.node_settings.get_root().append_file('UltraLightBeam.mp3')
        tag = Tag(_id='The Life of Pablo')
        tag.save()
        file.tags.append(tag)
        file.save()
        url = self.project.api_url_for('osfstorage_add_tag', fid=file._id)
        res = self.app.post_json(url, {'tag': 'The Life of Pablo'}, auth=self.user.auth, expect_errors=True)

        assert_equal(res.status_code, 400)
        mock_log.assert_not_called()
Ejemplo n.º 7
0
 def test_tag_the_same_tag(self):
     file = self.node_settings.get_root().append_file('Lie,Cheat,Steal.mp3')
     tag = Tag(_id='Run_the_Jewels')
     tag.save()
     file.tags.append(tag)
     file.save()
     assert_in('Run_the_Jewels', file.tags)
     url = self.project.api_url_for('osfstorage_add_tag', fid=file._id)
     res = self.app.post_json(url, {'tag': 'Run_the_Jewels'}, auth=self.user.auth, expect_errors=True)
     assert_equal(res.status_code, 400)
     assert_equal(res.json['status'], 'failure')
Ejemplo n.º 8
0
    def test_file_add_tag_fail_doesnt_create_log(self, mock_log):
        file = self.node_settings.get_root().append_file('UltraLightBeam.mp3')
        tag = Tag(_id='The Life of Pablo')
        tag.save()
        file.tags.append(tag)
        file.save()
        url = self.project.api_url_for('osfstorage_add_tag', fid=file._id)
        res = self.app.post_json(url, {'tag': 'The Life of Pablo'}, auth=self.user.auth, expect_errors=True)

        assert_equal(res.status_code, 400)
        mock_log.assert_not_called()
Ejemplo n.º 9
0
 def test_file_remove_tag(self):
     file = self.node_settings.get_root().append_file('Champion.mp3')
     tag = Tag(_id='Graduation')
     tag.save()
     file.tags.append(tag)
     file.save()
     assert_in('Graduation', file.tags)
     url = self.project.api_url_for('osfstorage_remove_tag', fid=file._id)
     self.app.delete_json(url, {'tag': 'Graduation'}, auth=self.user.auth)
     file.reload()
     assert_not_in('Graduation', file.tags)
Ejemplo n.º 10
0
 def test_tag_the_same_tag(self):
     file = self.node_settings.get_root().append_file('Lie,Cheat,Steal.mp3')
     tag = Tag(_id='Run_the_Jewels')
     tag.save()
     file.tags.append(tag)
     file.save()
     assert_in('Run_the_Jewels', file.tags)
     url = self.project.api_url_for('osfstorage_add_tag', fid=file._id)
     res = self.app.post_json(url, {'tag': 'Run_the_Jewels'}, auth=self.user.auth, expect_errors=True)
     assert_equal(res.status_code, 400)
     assert_equal(res.json['status'], 'failure')
Ejemplo n.º 11
0
    def test_file_remove_tag_creates_log(self):
        file = self.node_settings.get_root().append_file('Formation.flac')
        tag = Tag(_id='You that when you cause all this conversation')
        tag.save()
        file.tags.append(tag)
        file.save()
        url = self.project.api_url_for('osfstorage_remove_tag', fid=file._id)
        res = self.app.delete_json(url, {'tag': 'You that when you cause all this conversation'}, auth=self.user.auth)

        assert_equal(res.status_code, 200)
        self.node.reload()
        assert_equal(self.node.logs[-1].action, 'file_tag_removed')
Ejemplo n.º 12
0
    def test_file_remove_tag_creates_log(self):
        file = self.node_settings.get_root().append_file('Formation.flac')
        tag = Tag(_id='You that when you cause all this conversation')
        tag.save()
        file.tags.append(tag)
        file.save()
        url = self.project.api_url_for('osfstorage_remove_tag', fid=file._id)
        res = self.app.delete_json(url, {'tag': 'You that when you cause all this conversation'}, auth=self.user.auth)

        assert_equal(res.status_code, 200)
        self.node.reload()
        assert_equal(self.node.logs[-1].action, 'file_tag_removed')
Ejemplo n.º 13
0
 def test_remove_tag(self):
     file_ = self.root.append_file('I\'ve Been Loving You Too Long.mp3')
     tag = Tag(_id='Blue')
     tag.save()
     file_.tags.append(tag)
     file_.save()
     find = query_tag_file('Blue')['results']
     assert_equal(len(find), 1)
     file_.tags.remove('Blue')
     file_.save()
     find = query_tag_file('Blue')['results']
     assert_equal(len(find), 0)
Ejemplo n.º 14
0
 def test_remove_tag(self):
     file_ = self.root.append_file('I\'ve Been Loving You Too Long.mp3')
     tag = Tag(_id='Blue', name='Blue')
     tag.save()
     file_.tags.add(tag)
     file_.save()
     find = query_tag_file('Blue')['results']
     assert_equal(len(find), 1)
     file_.tags.remove(tag)
     file_.save()
     find = query_tag_file('Blue')['results']
     assert_equal(len(find), 0)
Ejemplo n.º 15
0
 def add_tag(self, tag, auth, save=True, log=True):
     from website.models import Tag, NodeLog  # Prevent import error
     if tag not in self.tags and not self.node.is_registration:
         new_tag = Tag.load(tag)
         if not new_tag:
             new_tag = Tag(_id=tag)
         new_tag.save()
         self.tags.append(new_tag)
         if log:
             self.add_tag_log(NodeLog.FILE_TAG_ADDED, tag, auth)
         if save:
             self.save()
         return True
     return False
Ejemplo n.º 16
0
 def add_tag(self, tag, auth, save=True, log=True):
     from website.models import Tag, NodeLog  # Prevent import error
     if tag not in self.tags and not self.node.is_registration:
         new_tag = Tag.load(tag)
         if not new_tag:
             new_tag = Tag(_id=tag)
         new_tag.save()
         self.tags.append(new_tag)
         if log:
             self.add_tag_log(NodeLog.FILE_TAG_ADDED, tag, auth)
         if save:
             self.save()
         return True
     return False
Ejemplo n.º 17
0
def conference_submissions(**kwargs):
    """Return data for all OSF4M submissions.

    The total number of submissions for each meeting is calculated and cached
    in the Conference.num_submissions field.
    """
    conferences = Conference.find(Q('is_meeting', 'ne', False))
    #  TODO: Revisit this loop, there has to be a way to optimize it
    for conf in conferences:
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()

        tags = Tag.find(
            Q('system', 'eq', False)
            & Q('name', 'iexact', conf.endpoint.lower())).values_list(
                'pk', flat=True)
        nodes = Node.find(
            Q('tags', 'in', tags) & Q('is_public', 'eq', True)
            & Q('is_deleted', 'ne', True)).include('guids')
        projects.update(list(nodes))
        num_submissions = len(projects)
        # Cache the number of submissions
        conf.num_submissions = num_submissions
    bulk_update(conferences, update_fields=['num_submissions'])
    return {'success': True}
Ejemplo n.º 18
0
def conference_submissions(**kwargs):
    """Return data for all OSF4M submissions.

    The total number of submissions for each meeting is calculated and cached
    in the Conference.num_submissions field.
    """
    submissions = []
    #  TODO: Revisit this loop, there has to be a way to optimize it
    for conf in Conference.find():
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()

        tags = Tag.find(Q('lower', 'eq', conf.endpoint.lower())).get_keys()
        nodes = Node.find(
            Q('tags', 'in', tags) &
            Q('is_public', 'eq', True) &
            Q('is_deleted', 'ne', True)
        )
        projects.update(list(nodes))

        for idx, node in enumerate(projects):
            submissions.append(_render_conference_node(node, idx, conf))
        num_submissions = len(projects)
        # Cache the number of submissions
        conf.num_submissions = num_submissions
        conf.save()
        if num_submissions < settings.CONFERENCE_MIN_COUNT:
            continue
    submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True)
    return {'submissions': submissions}
Ejemplo n.º 19
0
def conference_view(**kwargs):
    meetings = []
    submissions = []
    for conf in Conference.find():
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()
        for tag in Tag.find(Q('_id', 'iexact', conf.endpoint)):
            for node in tag.node__tagged:
                if not node:
                    continue
                if not node.is_public or node.is_deleted:
                    continue
                projects.add(node)

        for idx, node in enumerate(projects):
            submissions.append(_render_conference_node(node, idx, conf))
        num_submissions = len(projects)
        if num_submissions < settings.CONFERENCE_MIN_COUNT:
            continue
        meetings.append({
            'name': conf.name,
            'active': conf.active,
            'url': web_url_for('conference_results', meeting=conf.endpoint),
            'count': num_submissions,
        })

    submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True)
    meetings.sort(key=lambda meeting: meeting['count'], reverse=True)

    return {'meetings': meetings, 'submissions': submissions}
Ejemplo n.º 20
0
def conference_submissions(**kwargs):
    """Return data for all OSF4M submissions.

    The total number of submissions for each meeting is calculated and cached
    in the Conference.num_submissions field.
    """
    submissions = []
    for conf in Conference.find():
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()
        for tag in Tag.find(Q('lower', 'eq', conf.endpoint.lower())):
            for node in tag.node__tagged.find(Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)):
                projects.add(node)

        for idx, node in enumerate(projects):
            submissions.append(_render_conference_node(node, idx, conf))
        num_submissions = len(projects)
        # Cache the number of submissions
        conf.num_submissions = num_submissions
        conf.save()
        if num_submissions < settings.CONFERENCE_MIN_COUNT:
            continue
    submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True)
    return {'submissions': submissions}
Ejemplo n.º 21
0
def conference_submissions(**kwargs):
    """Return data for all OSF4M submissions.

    The total number of submissions for each meeting is calculated and cached
    in the Conference.num_submissions field.
    """
    submissions = []
    #  TODO: Revisit this loop, there has to be a way to optimize it
    for conf in Conference.find():
        if (hasattr(conf, 'is_meeting') and (conf.is_meeting is False)):
            break
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()

        tags = Tag.find(Q('lower', 'eq', conf.endpoint.lower())).get_keys()
        nodes = Node.find(
            Q('tags', 'in', tags) & Q('is_public', 'eq', True)
            & Q('is_deleted', 'ne', True))
        projects.update(list(nodes))

        for idx, node in enumerate(projects):
            submissions.append(_render_conference_node(node, idx, conf))
        num_submissions = len(projects)
        # Cache the number of submissions
        conf.num_submissions = num_submissions
        conf.save()
        if num_submissions < settings.CONFERENCE_MIN_COUNT:
            continue
    submissions.sort(key=lambda submission: submission['dateCreated'],
                     reverse=True)
    return {'submissions': submissions}
Ejemplo n.º 22
0
 def remove_tag(self, tag, auth, save=True, log=True):
     from website.models import Tag, NodeLog  # Prevent import error
     tag = Tag.load(tag)
     if tag and tag in self.tags and not self.node.is_registration:
         self.tags.remove(tag)
         if log:
             self.add_tag_log(NodeLog.FILE_TAG_REMOVED, tag._id, auth)
         if save:
             self.save()
         return True
     return False
Ejemplo n.º 23
0
 def remove_tag(self, tag, auth, save=True, log=True):
     from website.models import Tag, NodeLog  # Prevent import error
     tag = Tag.load(tag)
     if tag and tag in self.tags and not self.node.is_registration:
         self.tags.remove(tag)
         if log:
             self.add_tag_log(NodeLog.FILE_TAG_REMOVED, tag._id, auth)
         if save:
             self.save()
         return True
     return False
Ejemplo n.º 24
0
def verify_tags(node, modm_node):
    modm_tag_keys = [x for x in sorted(set(modm_node.tags._to_primary_keys())) if MODMTag.load(x)]
    django_tag_keys = sorted(set(node.tags.filter(
        system=False).values_list('_id',
                                  flat=True)))
    modm_system_tag_keys = sorted(set(modm_node.system_tags))
    django_system_tag_keys = sorted(set(
        node.system_tags.values_list('_id',
                                     flat=True)))

    assert modm_tag_keys == django_tag_keys, 'Modm tags {} don\'t match django tags {} in node {}:{}'.format(
        modm_tag_keys, django_tag_keys, modm_node._id, node._guid.guid)
    assert modm_system_tag_keys == django_system_tag_keys, 'Modm system tag keys {} don\'t match django system tags {}'.format(
        modm_system_tag_keys, django_system_tag_keys)
Ejemplo n.º 25
0
def verify_tags(node, modm_node):
    modm_tag_keys = [
        x for x in sorted(set(modm_node.tags._to_primary_keys()))
        if MODMTag.load(x)
    ]
    django_tag_keys = sorted(
        set(node.tags.filter(system=False).values_list('_id', flat=True)))
    modm_system_tag_keys = sorted(set(modm_node.system_tags))
    django_system_tag_keys = sorted(
        set(node.system_tags.values_list('_id', flat=True)))

    assert modm_tag_keys == django_tag_keys, 'Modm tags {} don\'t match django tags {} in node {}:{}'.format(
        modm_tag_keys, django_tag_keys, modm_node._id, node._guid.guid)
    assert modm_system_tag_keys == django_system_tag_keys, 'Modm system tag keys {} don\'t match django system tags {}'.format(
        modm_system_tag_keys, django_system_tag_keys)
Ejemplo n.º 26
0
    def remove_tag(self, tag, auth, save=True, log=True):
        from website.models import Tag, NodeLog  # Prevent import error
        if self.node.is_registration:
            # Can't perform edits on a registration
            raise NodeStateError

        tag = Tag.load(tag)
        if not tag:
            raise InvalidTagError
        elif tag not in self.tags:
            raise TagNotFoundError
        else:
            self.tags.remove(tag)
            if log:
                self.add_tag_log(NodeLog.FILE_TAG_REMOVED, tag._id, auth)
            if save:
                self.save()
            return True
Ejemplo n.º 27
0
    def remove_tag(self, tag, auth, save=True, log=True):
        from website.models import Tag, NodeLog  # Prevent import error
        if self.node.is_registration:
            # Can't perform edits on a registration
            raise NodeStateError

        tag = Tag.load(tag)
        if not tag:
            raise InvalidTagError
        elif tag not in self.tags:
            raise TagNotFoundError
        else:
            self.tags.remove(tag)
            if log:
                self.add_tag_log(NodeLog.FILE_TAG_REMOVED, tag._id, auth)
            if save:
                self.save()
            return True
Ejemplo n.º 28
0
    def parse(self):
        raw_post = random.choice(Post.objects.filter(is_raw = True))
        page = self.get_html(raw_post.original_url)
        soup = BeautifulSoup(page,"html.parser")
        content =str(soup.find("div", class_="entry-content"))
        if content == "None" or not content:
            raw_post.delete()
            return
        raw_post.html = content
        results = soup.findAll("a", {"rel" : "category tag"})

        tags = [ ( r['href'].split('/')[-2:-1][0], r.contents[0] ) for r in results]
        # for r in results:
        #     tags.append( (r['href'].split('/')[-2:-1][0], r.contents[0]) )

        images = soup.findAll("img")
        raw_post.first_image = images[1]['src']
        raw_post.first_text = str(soup.findAll("p")[0].contents[0])

        raw_post.title = self.reg_find(r"<h1 class=\"entry-title\">(.+?)</h1>", page)[0]
        
        same_post = Post.objects.filter(title = raw_post.title, is_raw = False).first()
        if same_post:
            same_post.delete()

        post_date = self.reg_find(r">.*(\d\d\.\d\d\.\d\d\d\d)</time>", page)[0]
        post_date = post_date.replace('.','')
        #post_date = datetime.datetime.strptime(post_date, "%dd%mm%YYYY").date()
        
        format_str = '%d%m%Y' # The format
        raw_post.post_date = datetime.datetime.strptime(post_date, format_str)

        author = self.reg_find(r"class=\"url fn n\">(.+?)</a>", page)[0]
        a = Author.objects.filter(name = author)
        if a:
            a = a[0]
        else:
            a = Author(name = author)
            a.save()
        raw_post.author = a            

        existed_tags = Tag.objects.filter(name__in = [t[0] for t in tags])
        for t in tags:
            if not t[0] in [et.name for et in  existed_tags]:
                new_tag = Tag(name =t[0], value = t[1])
                new_tag.save()

        post_tags = Tag.objects.filter(name__in = [t[0] for t in tags])
        
        # post = Post(
        #     title = title, 
        #     html = content,  
        #     author =a, 
        #     original_url = url, 
        #     post_date = post_date, 
        #     first_image = first_image, 
        #     first_text=first_p)

        # post.save()
        # post.tags.set(post_tags)
        raw_post.is_raw = False
        raw_post.save()
        raw_post.tags.set(post_tags)

        print("post %s saved" % str(raw_post))
Ejemplo n.º 29
0
def do_migration():
    for t in Tag.find():
        logger.info('Migrating tag {!r}'.format(t))
        t.lower = t._id.lower()
        t.save(force=True)
Ejemplo n.º 30
0
def do_migration():
    for t in Tag.find():
        logger.info('Migrating tag {!r}'.format(t))
        t.lower = t._id.lower()
        t.save(force=True)