Example #1
0
def embed(query_id, visualization_id, org_slug=None):
    # TODO: add event for embed access
    query = models.Query.get_by_id_and_org(query_id, current_org)
    require_access(query.groups, current_user, view_only)
    vis = query.visualizations.where(models.Visualization.id == visualization_id).first()
    qr = {}

    if vis is not None:
        vis = vis.to_dict()
        qr = query.latest_query_data
        if qr is None:
            abort(400, message="No Results for this query")
        else:
            qr = qr.to_dict()
    else:
        abort(404, message="Visualization not found.")

    client_config = {}
    client_config.update(settings.COMMON_CLIENT_CONFIG)

    qr = project(qr, ('data', 'id', 'retrieved_at'))
    vis = project(vis, ('description', 'name', 'id', 'options', 'query', 'type', 'updated_at'))
    vis['query'] = project(vis, ('created_at', 'description', 'name', 'id', 'latest_query_data_id', 'name', 'updated_at'))

    return render_template("embed.html",
                           name=settings.NAME,
                           base_href=base_href(),
                           client_config=json_dumps(client_config),
                           visualization=json_dumps(vis),
                           query_result=json_dumps(qr),
                           analytics=settings.ANALYTICS)
Example #2
0
def buffer_logic(state):
    '''
    Buffer create/replace/reuse logic. The function name is not very good :(

       new_state    |   old_state   | same sources |     action
    ----------------|---------------|--------------|-----------------
        replace     |    replace    |    True      |  reuse buffer
        replace     |    replace    |    False     |  replace buffer
        replace     |   no-replace  |    True      |  create buffer (copy candidates)
        replace     |   no-replace  |    False     |  create buffer
       no-replace   |    replace    |    True      |  create buffer (copy candidates)
       no-replace   |    replace    |    False     |  create buffer
       no-replace   |   no-replace  |    True      |  reuse buffer
       no-replace   |   no-replace  |    False     |  create buffer

    A reusable buffer will be looked for, then a replacement buffer and as a
    last resort a new one will be created.

    Returns:
        old_state (dict): In case a state was reused/replaced it is returned
        because it will be needed later on to compare it with the current
        state and determine whether the window should be resized/moved, etc...
    '''
    # We are only interested in buffers which are in the same container.
    # That's where the interesting reuse/replace logic is at.
    states = fn.where(variables.states, container=state['container'])

    with_same_sources = partial(same_sources, state)

    reusable_state = fn.first(fn.where(
        ifilter(with_same_sources, states),
        replace = state['replace']
    ))

    replaceable_state = fn.first(fn.where(
        ifilter(lambda x: not with_same_sources(x), states),
        replace = True
    ))

    old_state = None

    if reusable_state:
        state.update(fn.project(reusable_state, ['uid', 'buffer', 'sources']))
        old_state = reusable_state
        variables.states.remove(reusable_state)

    elif replaceable_state:
        state.update(fn.project(replaceable_state, ['uid', 'buffer']))
        state['sources'] = populated_candidates(state)
        set_buffer_contents(state['buffer'], aggregate_candidates(state))
        old_state = replaceable_state
        variables.states.remove(replaceable_state)

    else:
        same = find(with_same_sources, states)
        state['sources'] = (same and same['sources']) or populated_candidates(state)
        state['buffer'] = make_pyunite_buffer(state)

    return old_state
Example #3
0
def embed(query_id, visualization_id, org_slug=None):
    query = models.Query.get_by_id_and_org(query_id, current_org)
    require_access(query.groups, current_user, view_only)
    vis = query.visualizations.where(models.Visualization.id == visualization_id).first()
    qr = {}

    parameter_values = collect_parameters_from_request(request.args)

    if vis is not None:
        vis = vis.to_dict()
        qr = query.latest_query_data
        logging.info("jonhere")
        logging.info( settings.ALLOW_PARAMETERS_IN_EMBEDS)
        if settings.ALLOW_PARAMETERS_IN_EMBEDS == True and len(parameter_values) > 0:
            #abort(404,message="jlk") 
            # run parameterized query
            #
            # WARNING: Note that the external query parameters
            #          are a potential risk of SQL injections.
            #
            results = run_query_sync(query.data_source, parameter_values, query.query)
            logging.info("jonhere2")
            logging.info("results")
            if results is None:
                abort(400, message="Unable to get results for this query")
            else:
                qr = {"data": json.loads(results)}
        elif qr is None:
            abort(400, message="No Results for this query")
        else:
            qr = qr.to_dict()
    else:
        abort(404, message="Visualization not found.")

    record_event(current_org, current_user, {
        'action': 'view',
        'object_id': visualization_id,
        'object_type': 'visualization',
        'query_id': query_id,
        'embed': True,
        'referer': request.headers.get('Referer')
    })

    client_config = {}
    client_config.update(settings.COMMON_CLIENT_CONFIG)

    qr = project(qr, ('data', 'id', 'retrieved_at'))
    vis = project(vis, ('description', 'name', 'id', 'options', 'query', 'type', 'updated_at'))
    vis['query'] = project(vis['query'], ('created_at', 'description', 'name', 'id', 'latest_query_data_id', 'name', 'updated_at'))

    return render_template("embed.html",
                           client_config=json_dumps(client_config),
                           visualization=json_dumps(vis),
                           query_result=json_dumps(qr))
Example #4
0
    def get(self):
        if self.current_user.has_permission('admin'):
            data_sources = models.DataSource.all(self.current_org)
        else:
            data_sources = models.DataSource.all(self.current_org, group_ids=self.current_user.group_ids)

        response = {}
        for ds in data_sources:
            if ds.id in response:
                continue

            try:
                d = ds.to_dict()
                d['view_only'] = all(project(ds.groups, self.current_user.group_ids).values())
                response[ds.id] = d
            except AttributeError:
                logging.exception("Error with DataSource#to_dict (data source id: %d)", ds.id)

        self.record_event({
            'action': 'list',
            'object_id': 'admin/data_sources',
            'object_type': 'datasource',
        })

        return sorted(response.values(), key=lambda d: d['name'].lower())
Example #5
0
 def test_edit(self):
     qs = models.QuerySnippet(
         trigger='a',
         description='b',
         snippet='c',
         user=self.factory.user,
         org=self.factory.org
     )
     models.db.session.add(qs)
     models.db.session.commit()
     res = self.make_request(
         'post',
         '/api/query_snippets/1',
         data={'trigger': 'x', 'description': 'y', 'snippet': 'z'},
         user=self.factory.user)
     self.assertEqual(
         project(res.json, ['id', 'trigger', 'description', 'snippet']), {
             'id': 1,
             'trigger': 'x',
             'description': 'y',
             'snippet': 'z',
         })
     self.assertEqual(qs.trigger, 'x')
     self.assertEqual(qs.description, 'y')
     self.assertEqual(qs.snippet, 'z')
Example #6
0
 def test_list(self):
     qs = models.QuerySnippet(
         trigger='x',
         description='y',
         snippet='z',
         user=self.factory.user,
         org=self.factory.org
     )
     models.db.session.add(qs)
     models.db.session.commit()
     res = self.make_request(
         'get',
         '/api/query_snippets',
         user=self.factory.user)
     self.assertEqual(res.status_code, 200)
     data = res.json
     self.assertEqual(len(data), 1)
     self.assertEqual(
         project(data[0], ['id', 'trigger', 'description', 'snippet']), {
             'id': 1,
             'trigger': 'x',
             'description': 'y',
             'snippet': 'z',
         })
     self.assertEqual(qs.trigger, 'x')
     self.assertEqual(qs.description, 'y')
     self.assertEqual(qs.snippet, 'z')
Example #7
0
def split_date(spec):
    if spec['is_continuous']:
        yield project(spec, (
            'is_continuous',
            'start_date',
            'end_date',
            'start_time',
            'end_time',
        ))
        return

    start_date = parse_date(spec['start_date'])
    end_date = maybe_parse_date(spec['end_date']) or start_date

    days = int((end_date - start_date).total_seconds() / (60 * 60 * 24))
    schedules = spec['schedules'] or [{
        'days_of_week': [0, 1, 2, 3, 4, 5, 6],
        'start_time': spec['start_time'],
        'end_time': spec['end_time'],
    }]

    for day in range(days + 1):
        this_date = start_date + timedelta(days=day)
        matching_schedules = filter(
            lambda s: this_date.isoweekday() - 1 in s['days_of_week'],
            schedules
        )
        for schedule in matching_schedules:
            yield {
                'is_continuous': False,
                'start_date': this_date.isoformat(),
                'end_date': None,
                'start_time': schedule['start_time'],
                'end_time': schedule['end_time'],
            }
Example #8
0
def get_closure(func):
    if isinstance(func, type):
        methods = inspect.getmembers(func, predicate=inspect.ismethod)
        return join(get_closure(meth.im_func) for _, meth in methods) or {}

    code = Code.from_code(func.__code__)
    names = _code_names(code)
    return project(func.__globals__, names)
Example #9
0
async def get_list(request, query, type_=None, relations={}, page=1, page_size=20, fields=(), expand=()):
    items, count, took = await search(query, type_, page, page_size, fields)

    expanding_relations = project(relations, expand)
    items = await expand_multiple_items(items, expanding_relations)

    previous = get_previous_page_uri(request, page, page_size)
    next_ = get_next_page_uri(request, page, page_size, count)

    return {"count": count, "items": items, "took": took, "previous": previous, "next": next_}
Example #10
0
    def to_dict(self, with_widgets=False, user=None):
        layout = json.loads(self.layout)

        if with_widgets:
            widget_list = Widget.select(Widget, Visualization, Query, User)\
                .where(Widget.dashboard == self.id)\
                .join(Visualization, join_type=peewee.JOIN_LEFT_OUTER)\
                .join(Query, join_type=peewee.JOIN_LEFT_OUTER)\
                .join(User, join_type=peewee.JOIN_LEFT_OUTER)

            widgets = {}

            for w in widget_list:
                if w.visualization_id is None:
                    widgets[w.id] = w.to_dict()
                elif user and has_access(w.visualization.query.groups, user, view_only):
                    widgets[w.id] = w.to_dict()
                else:
                    widgets[w.id] = project(w.to_dict(),
                                            ('id', 'width', 'dashboard_id', 'options', 'created_at', 'updated_at'))
                    widgets[w.id]['restricted'] = True

            # The following is a workaround for cases when the widget object gets deleted without the dashboard layout
            # updated. This happens for users with old databases that didn't have a foreign key relationship between
            # visualizations and widgets.
            # It's temporary until better solution is implemented (we probably should move the position information
            # to the widget).
            widgets_layout = []
            for row in layout:
                new_row = []
                for widget_id in row:
                    widget = widgets.get(widget_id, None)
                    if widget:
                        new_row.append(widget)

                widgets_layout.append(new_row)

            # widgets_layout = map(lambda row: map(lambda widget_id: widgets.get(widget_id, None), row), layout)
        else:
            widgets_layout = None

        return {
            'id': self.id,
            'slug': self.slug,
            'name': self.name,
            'user_id': self.user_id,
            'layout': layout,
            'groups': self.groups,
            'dashboard_filters_enabled': self.dashboard_filters_enabled,
            'widgets': widgets_layout,
            'is_archived': self.is_archived,
            'updated_at': self.updated_at,
            'created_at': self.created_at
        }
Example #11
0
    def get(self):
        if self.current_user.has_permission('admin'):
            data_sources = models.DataSource.all(self.current_org)
        else:
            data_sources = models.DataSource.all(self.current_org, groups=self.current_user.groups)

        response = []
        for ds in data_sources:
            d = ds.to_dict()
            d['view_only'] = all(project(ds.groups, self.current_user.groups).values())
            response.append(d)

        return response
Example #12
0
def public_dashboard(dashboard):
    dashboard_dict = project(serialize_dashboard(dashboard, with_favorite_state=False), (
        'name', 'layout', 'dashboard_filters_enabled', 'updated_at',
        'created_at'
    ))

    widget_list = (models.Widget.query
                   .filter(models.Widget.dashboard_id == dashboard.id)
                   .outerjoin(models.Visualization)
                   .outerjoin(models.Query))

    dashboard_dict['widgets'] = [public_widget(w) for w in widget_list]
    return dashboard_dict
Example #13
0
    def post(self, user_id):
        require_admin_or_owner(user_id)
        user = models.User.get_by_id_and_org(user_id, self.current_org)

        req = request.get_json(True)

        params = project(req, ('email', 'name', 'password', 'old_password', 'groups'))

        if 'password' in params and 'old_password' not in params:
            abort(403, message="Must provide current password to update password.")

        if 'old_password' in params and not user.verify_password(params['old_password']):
            abort(403, message="Incorrect current password.")

        if 'password' in params:
            user.hash_password(params.pop('password'))
            params.pop('old_password')

        if 'groups' in params and not self.current_user.has_permission('admin'):
            abort(403, message="Must be admin to change groups membership.")

        if 'email' in params:
            _, domain = params['email'].split('@', 1)

            if domain.lower() in blacklist or domain.lower() == 'qq.com':
                abort(400, message='Bad email address.')

        try:
            self.update_model(user, params)
            models.db.session.commit()

            # The user has updated their email or password. This should invalidate all _other_ sessions,
            # forcing them to log in again. Since we don't want to force _this_ session to have to go
            # through login again, we call `login_user` in order to update the session with the new identity details.
            login_user(user, remember=True)
        except IntegrityError as e:
            if "email" in e.message:
                message = "Email already taken."
            else:
                message = "Error updating record"

            abort(400, message=message)

        self.record_event({
            'action': 'edit',
            'object_id': user.id,
            'object_type': 'user',
            'updated_fields': params.keys()
        })

        return user.to_dict(with_api_key=is_admin_or_owner(user_id))
Example #14
0
def embed(query_id, visualization_id, org_slug=None):
    query = models.Query.get_by_id_and_org(query_id, current_org)
    require_access(query.groups, current_user, view_only)
    vis = query.visualizations.where(models.Visualization.id == visualization_id).first()
    qr = {}

    if vis is not None:
        vis = vis.to_dict()
        qr = query.latest_query_data
        if qr is None:
            abort(400, message="No Results for this query")
        else:
            qr = qr.to_dict()
    else:
        abort(404, message="Visualization not found.")

    record_event(current_org, current_user, {
        'action': 'view',
        'object_id': visualization_id,
        'object_type': 'visualization',
        'query_id': query_id,
        'embed': True,
        'referer': request.headers.get('Referer')
    })

    client_config = {}
    client_config.update(settings.COMMON_CLIENT_CONFIG)

    qr = project(qr, ('data', 'id', 'retrieved_at'))
    vis = project(vis, ('description', 'name', 'id', 'options', 'query', 'type', 'updated_at'))
    vis['query'] = project(vis['query'], ('created_at', 'description', 'name', 'id', 'latest_query_data_id', 'name', 'updated_at'))

    return render_template("embed.html",
                           client_config=json_dumps(client_config),
                           visualization=json_dumps(vis),
                           query_result=json_dumps(qr))
Example #15
0
    def post(self, snippet_id):
        req = request.get_json(True)
        params = project(req, ('trigger', 'description', 'snippet'))
        snippet = get_object_or_404(models.QuerySnippet.get_by_id_and_org, snippet_id, self.current_org)
        require_admin_or_owner(snippet.user.id)

        snippet.update_instance(**params)

        self.record_event({
            'action': 'edit',
            'object_id': snippet.id,
            'object_type': 'query_snippet'
        })

        return snippet.to_dict()
Example #16
0
    def get(self):
        if self.current_user.has_permission('admin'):
            data_sources = models.DataSource.all(self.current_org)
        else:
            data_sources = models.DataSource.all(self.current_org, groups=self.current_user.groups)

        response = {}
        for ds in data_sources:
            if ds.id in response:
                continue

            d = ds.to_dict()
            d['view_only'] = all(project(ds.groups, self.current_user.groups).values())
            response[ds.id] = d

        return sorted(response.values(), key=lambda d: d['id'])
Example #17
0
    def post(self, dashboard_slug):
        dashboard_properties = request.get_json(force=True)
        # TODO: either convert all requests to use slugs or ids
        dashboard = models.Dashboard.get_by_id_and_org(dashboard_slug, self.current_org)

        require_object_modify_permission(dashboard, self.current_user)

        updates = project(dashboard_properties, ('name', 'layout', 'version'))
        updates['changed_by'] = self.current_user

        try:
            dashboard.update_instance(**updates)
        except ConflictDetectedError:
            abort(409)

        result = dashboard.to_dict(with_widgets=True, user=self.current_user)
        return result
Example #18
0
 def test_create(self):
     res = self.make_request(
         'post',
         '/api/query_snippets',
         data={'trigger': 'x', 'description': 'y', 'snippet': 'z'},
         user=self.factory.user)
     self.assertEqual(
         project(res.json, ['id', 'trigger', 'description', 'snippet']), {
             'id': 1,
             'trigger': 'x',
             'description': 'y',
             'snippet': 'z',
         })
     qs = models.QuerySnippet.query.one()
     self.assertEqual(qs.trigger, 'x')
     self.assertEqual(qs.description, 'y')
     self.assertEqual(qs.snippet, 'z')
Example #19
0
    def post(self, alert_id):
        req = request.get_json(True)
        params = project(req, ('options', 'name', 'query_id', 'rearm'))
        alert = get_object_or_404(models.Alert.get_by_id_and_org, alert_id, self.current_org)
        require_admin_or_owner(alert.user.id)

        self.update_model(alert, params)
        models.db.session.commit()

        self.record_event({
            'action': 'edit',
            'timestamp': int(time.time()),
            'object_id': alert.id,
            'object_type': 'alert'
        })

        return serialize_alert(alert)
Example #20
0
    def post(self, user_id):
        require_admin_or_owner(user_id)
        user = models.User.get_by_id_and_org(user_id, self.current_org)

        req = request.get_json(True)

        params = project(req, ('email', 'name', 'password', 'old_password', 'groups'))

        if 'password' in params and 'old_password' not in params:
            abort(403, message="Must provide current password to update password.")

        if 'old_password' in params and not user.verify_password(params['old_password']):
            abort(403, message="Incorrect current password.")

        if 'password' in params:
            user.hash_password(params.pop('password'))
            params.pop('old_password')

        if 'groups' in params and not self.current_user.has_permission('admin'):
            abort(403, message="Must be admin to change groups membership.")
        
        if 'email' in params:
            _, domain = params['email'].split('@', 1)

            if domain.lower() in blacklist or domain.lower() == 'qq.com':
                abort(400, message='Bad email address.')

        try:
            self.update_model(user, params)
            models.db.session.commit()
        except IntegrityError as e:
            if "email" in e.message:
                message = "Email already taken."
            else:
                message = "Error updating record"

            abort(400, message=message)

        self.record_event({
            'action': 'edit',
            'object_id': user.id,
            'object_type': 'user',
            'updated_fields': params.keys()
        })

        return user.to_dict(with_api_key=is_admin_or_owner(user_id))
Example #21
0
    def post(self, alert_id):
        req = request.get_json(True)
        params = project(req, ('options', 'name', 'query_id'))
        alert = models.Alert.get_by_id(alert_id)
        if 'query_id' in params:
            params['query'] = params.pop('query_id')

        alert.update_instance(**params)

        record_event.delay({
            'user_id': self.current_user.id,
            'action': 'edit',
            'timestamp': int(time.time()),
            'object_id': alert.id,
            'object_type': 'alert'
        })

        return alert.to_dict()
Example #22
0
    def post(self, dashboard_slug):
        """
        Modifies a dashboard.

        :qparam string slug: Slug of dashboard to retrieve.

        Responds with the updated :ref:`dashboard <dashboard-response-label>`.

        :status 200: success
        :status 409: Version conflict -- dashboard modified since last read
        """
        dashboard_properties = request.get_json(force=True)
        # TODO: either convert all requests to use slugs or ids
        dashboard = models.Dashboard.get_by_id_and_org(dashboard_slug, self.current_org)

        require_object_modify_permission(dashboard, self.current_user)

        updates = project(dashboard_properties, ('name', 'layout', 'version', 'tags',
                                                 'is_draft', 'dashboard_filters_enabled'))

        # SQLAlchemy handles the case where a concurrent transaction beats us
        # to the update. But we still have to make sure that we're not starting
        # out behind.
        if 'version' in updates and updates['version'] != dashboard.version:
            abort(409)

        updates['changed_by'] = self.current_user

        self.update_model(dashboard, updates)
        models.db.session.add(dashboard)
        try:
            models.db.session.commit()
        except StaleDataError:
            abort(409)

        result = serialize_dashboard(dashboard, with_widgets=True, user=self.current_user)

        self.record_event({
            'action': 'edit',
            'object_id': dashboard.id,
            'object_type': 'dashboard',
        })

        return result
Example #23
0
def create_stage(cls, repo, path, external=False, **kwargs):
    from dvc.dvcfile import check_dvc_filename

    wdir = os.path.abspath(kwargs.get("wdir", None) or os.curdir)
    path = os.path.abspath(path)
    check_dvc_filename(path)
    check_stage_path(repo, wdir, is_wdir=kwargs.get("wdir"))
    check_stage_path(repo, os.path.dirname(path))

    stage = loads_from(cls, repo, path, wdir, kwargs)
    fill_stage_outputs(stage, **kwargs)
    if not external:
        check_no_externals(stage)
    fill_stage_dependencies(stage,
                            **project(kwargs, ["deps", "erepo", "params"]))
    check_circular_dependency(stage)
    check_duplicated_arguments(stage)

    return stage
Example #24
0
    def get(self):
        if self.current_user.has_permission("admin"):
            data_sources = models.DataSource.all(self.current_org)
        else:
            data_sources = models.DataSource.all(self.current_org, groups=self.current_user.groups)

        response = {}
        for ds in data_sources:
            if ds.id in response:
                continue

            d = ds.to_dict()
            d["view_only"] = all(project(ds.groups, self.current_user.groups).values())
            response[ds.id] = d

        # Sorting by ID before returning makes it easier to set default source on front end -- ABD
        sources = sorted(response.values(), key=itemgetter("id"))

        return sources
Example #25
0
    def get(self):
        if self.current_user.has_permission('admin'):
            data_sources = models.DataSource.all(self.current_org)
        else:
            data_sources = models.DataSource.all(self.current_org, groups=self.current_user.groups)

        response = {}
        for ds in data_sources:
            if ds.id in response:
                continue

            try:
                d = ds.to_dict()
                d['view_only'] = all(project(ds.groups, self.current_user.groups).values())
                response[ds.id] = d
            except AttributeError:
                logging.exception("Error with DataSource#to_dict (data source id: %d)", ds.id)

        return sorted(response.values(), key=lambda d: d['id'])
Example #26
0
    def post(self, user_id):
        require_admin_or_owner(user_id)
        user = models.User.get_by_id(user_id)

        req = request.get_json(True)

        params = project(req, ("email", "name", "password", "old_password", "groups"))

        if "password" in params and "old_password" not in params:
            abort(403, message="Must provide current password to update password.")

        if "old_password" in params and not user.verify_password(params["old_password"]):
            abort(403, message="Incorrect current password.")

        if "password" in params:
            user.hash_password(params.pop("password"))
            params.pop("old_password")

        if "groups" in params and not self.current_user.has_permission("admin"):
            abort(403, message="Must be admin to change groups membership.")

        try:
            user.update_instance(**params)
        except IntegrityError as e:
            if "email" in e.message:
                message = "Email already taken."
            else:
                message = "Error updating record"

            abort(400, message=message)

        record_event.delay(
            {
                "user_id": self.current_user.id,
                "action": "edit",
                "timestamp": int(time.time()),
                "object_id": user.id,
                "object_type": "user",
                "updated_fields": params.keys(),
            }
        )

        return user.to_dict(with_api_key=is_admin_or_owner(user_id))
Example #27
0
    def post(self, alert_id):
        req = request.get_json(True)
        params = project(req, ('options', 'name', 'query_id', 'rearm'))
        alert = get_object_or_404(models.Alert.get_by_id_and_org, alert_id, self.current_org)
        require_admin_or_owner(alert.user.id)

        if 'query_id' in params:
            params['query'] = params.pop('query_id')

        alert.update_instance(**params)

        self.record_event({
            'action': 'edit',
            'timestamp': int(time.time()),
            'object_id': alert.id,
            'object_type': 'alert'
        })

        return alert.to_dict()
Example #28
0
    def post(self, alert_id):
        req = request.get_json(True)
        params = project(req, ('options', 'name', 'query_id', 'rearm'))
        alert = get_object_or_404(models.Alert.get_by_id_and_org, alert_id, self.current_org)
        require_admin_or_owner(alert.user.id)

        if 'query_id' in params:
            params['query'] = params.pop('query_id')

        alert.update_instance(**params)

        self.record_event({
            'action': 'edit',
            'timestamp': int(time.time()),
            'object_id': alert.id,
            'object_type': 'alert'
        })

        return alert.to_dict()
Example #29
0
def serialize_dashboard(obj, with_widgets=False, user=None, with_favorite_state=True):
    layout = json_loads(obj.layout)

    widgets = []

    if with_widgets:
        for w in obj.widgets:
            if w.visualization_id is None:
                widgets.append(serialize_widget(w))
            elif user and has_access(w.visualization.query_rel, user, view_only):
                widgets.append(serialize_widget(w))
            else:
                widget = project(serialize_widget(w),
                                ('id', 'width', 'dashboard_id', 'options', 'created_at', 'updated_at'))
                widget['restricted'] = True
                widgets.append(widget)
    else:
        widgets = None

    d = {
        'id': obj.id,
        'slug': obj.slug,
        'name': obj.name,
        'user_id': obj.user_id,
        # TODO: we should properly load the users
        'user': obj.user.to_dict(),
        'layout': layout,
        'dashboard_filters_enabled': obj.dashboard_filters_enabled,
        'widgets': widgets,
        'is_archived': obj.is_archived,
        'is_draft': obj.is_draft,
        'tags': obj.tags or [],
        # TODO: bulk load favorites
        'updated_at': obj.updated_at,
        'created_at': obj.created_at,
        'version': obj.version
    }

    if with_favorite_state:
        d['is_favorite'] = models.Favorite.is_favorite(current_user.id, obj)

    return d
Example #30
0
    def _post_save(self, sender, instance, **kwargs):
        # Invoke invalidations for both old and new versions of saved object
        old = _old_objs.pop((get_thread_id(), sender, instance.pk), None)
        if old:
            invalidate_obj(old)
        invalidate_obj(instance)

        # Enabled cache_on_save makes us write saved object to cache.
        # Later it can be retrieved with .get(<cache_on_save_field>=<value>)
        # <cache_on_save_field> is pk unless specified.
        # This sweet trick saves a db request and helps with slave lag.
        cache_on_save = instance._cacheprofile.get('cache_on_save')
        if cache_on_save:
            # HACK: We get this object "from field" so it can contain
            #       some undesirable attributes or other objects attached.
            #       RelatedField accessors do that, for example.
            #
            #       So we strip down any _*_cache attrs before saving
            #       and later reassign them
            # Stripping up undesirable attributes
            unwanted_attrs = [
                k for k in instance.__dict__
                if k.startswith('_') and k.endswith('_cache')
            ]
            unwanted_dict = project(instance.__dict__, unwanted_attrs)
            for k in unwanted_attrs:
                del instance.__dict__[k]

            key = 'pk' if cache_on_save is True else cache_on_save
            # Django doesn't allow filters like related_id = 1337.
            # So we just hacky strip _id from end of a key
            # TODO: make it right, _meta.get_field() should help
            filter_key = key[:-3] if key.endswith('_id') else key

            cond = {filter_key: getattr(instance, key)}
            qs = sender.objects.inplace().filter(**cond).order_by()
            if MAX_GET_RESULTS:
                qs = qs[:MAX_GET_RESULTS + 1]
            qs._cache_results(qs._cache_key(), [instance])

            # Reverting stripped attributes
            instance.__dict__.update(unwanted_dict)
def public_dashboard(dashboard):
    dashboard_dict = project(dashboard.to_dict(), ('name', 'layout', 'dashboard_filters_enabled', 'updated_at', 'created_at'))

    widget_list = (models.Widget.query
                   .filter(models.Widget.dashboard_id == dashboard.id)
                   .outerjoin(models.Visualization)
                   .outerjoin(models.Query))
    widgets = {w.id: public_widget(w) for w in widget_list}

    widgets_layout = []
    for row in dashboard_dict['layout']:
        new_row = []
        for widget_id in row:
            widget = widgets.get(widget_id, None)
            if widget:
                new_row.append(widget)
        widgets_layout.append(new_row)

    dashboard_dict['widgets'] = widgets_layout
    return dashboard_dict
Example #32
0
def public_dashboard(dashboard):
    dashboard_dict = project(dashboard.to_dict(), ('name', 'layout', 'dashboard_filters_enabled', 'updated_at', 'created_at'))

    widget_list = models.Widget.select(models.Widget, models.Visualization, models.Query) \
        .where(models.Widget.dashboard == dashboard.id) \
        .join(models.Visualization, join_type=models.peewee.JOIN_LEFT_OUTER) \
        .join(models.Query, join_type=models.peewee.JOIN_LEFT_OUTER)
    widgets = {w.id: public_widget(w) for w in widget_list}

    widgets_layout = []
    for row in dashboard_dict['layout']:
        new_row = []
        for widget_id in row:
            widget = widgets.get(widget_id, None)
            if widget:
                new_row.append(widget)
        widgets_layout.append(new_row)

    dashboard_dict['widgets'] = widgets_layout
    return dashboard_dict
Example #33
0
    def post(self, alert_id):
        req = request.get_json(True)
        params = project(req, ("options", "name", "query_id"))
        alert = models.Alert.get_by_id(alert_id)
        if "query_id" in params:
            params["query"] = params.pop("query_id")

        alert.update_instance(**params)

        record_event.delay(
            {
                "user_id": self.current_user.id,
                "action": "edit",
                "timestamp": int(time.time()),
                "object_id": alert.id,
                "object_type": "alert",
            }
        )

        return alert.to_dict()
Example #34
0
    def get(self, data_source_id):
        data_source = get_object_or_404(models.DataSource.get_by_id_and_org,
                                        data_source_id, self.current_org)
        require_access(data_source, self.current_user, view_only)

        ds = {}
        if self.current_user.has_permission("list_data_sources"):
            # if it's a non-admin, limit the information
            ds = data_source.to_dict(
                all=self.current_user.has_permission("admin"))

        # add view_only info, required for frontend permissions
        ds["view_only"] = all(
            project(data_source.groups, self.current_user.group_ids).values())
        self.record_event({
            "action": "view",
            "object_id": data_source_id,
            "object_type": "datasource"
        })
        return ds
Example #35
0
    def _read_env(self, out, checkpoint_func=None) -> Env:
        env: Env = {}
        if out.live:
            from dvc.env import DVCLIVE_HTML, DVCLIVE_PATH, DVCLIVE_SUMMARY
            from dvc.output import BaseOutput
            from dvc.schema import LIVE_PROPS

            env[DVCLIVE_PATH] = str(out.path_info)
            if isinstance(out.live, dict):
                config = project(out.live, LIVE_PROPS)

                env[DVCLIVE_SUMMARY] = str(
                    int(config.get(BaseOutput.PARAM_LIVE_SUMMARY, True)))
                env[DVCLIVE_HTML] = str(
                    int(config.get(BaseOutput.PARAM_LIVE_HTML, True)))
        elif out.checkpoint and checkpoint_func:
            from dvc.env import DVC_CHECKPOINT

            env.update({DVC_CHECKPOINT: "1"})
        return env
Example #36
0
 def test_create(self):
     res = self.make_request('post',
                             '/api/query_snippets',
                             data={
                                 'trigger': 'x',
                                 'description': 'y',
                                 'snippet': 'z'
                             },
                             user=self.factory.user)
     self.assertEqual(
         project(res.json, ['id', 'trigger', 'description', 'snippet']), {
             'id': 1,
             'trigger': 'x',
             'description': 'y',
             'snippet': 'z',
         })
     qs = models.QuerySnippet.query.one()
     self.assertEqual(qs.trigger, 'x')
     self.assertEqual(qs.description, 'y')
     self.assertEqual(qs.snippet, 'z')
Example #37
0
def loads_from(cls, repo, path, wdir, data):
    kw = {
        "repo":
        repo,
        "path":
        path,
        "wdir":
        wdir,
        **project(
            data,
            [
                Stage.PARAM_CMD,
                Stage.PARAM_LOCKED,
                Stage.PARAM_ALWAYS_CHANGED,
                Stage.PARAM_MD5,
                "name",
            ],
        ),
    }
    return cls(**kw)
Example #38
0
    def post(self, user_id):
        require_admin_or_owner(user_id)
        user = models.User.get_by_id_and_org(user_id, self.current_org)

        req = request.get_json(True)

        params = project(req, ('email', 'name', 'password', 'old_password', 'groups'))

        if 'password' in params and 'old_password' not in params:
            abort(403, message="Must provide current password to update password.")

        if 'old_password' in params and not user.verify_password(params['old_password']):
            abort(403, message="Incorrect current password.")

        if 'password' in params:
            user.hash_password(params.pop('password'))
            params.pop('old_password')

        if 'groups' in params and not self.current_user.has_permission('admin'):
            abort(403, message="Must be admin to change groups membership.")

        try:
            self.update_model(user, params)
            models.db.session.commit()
        except IntegrityError as e:
            if "email" in e.message:
                message = "Email already taken."
            else:
                message = "Error updating record"

            abort(400, message=message)

        self.record_event({
            'action': 'edit',
            'timestamp': int(time.time()),
            'object_id': user.id,
            'object_type': 'user',
            'updated_fields': params.keys()
        })

        return user.to_dict(with_api_key=is_admin_or_owner(user_id))
Example #39
0
    def post(self, dashboard_slug):
        """
        Modifies a dashboard.

        :qparam string slug: Slug of dashboard to retrieve.

        Responds with the updated :ref:`dashboard <dashboard-response-label>`.

        :status 200: success
        :status 409: Version conflict -- dashboard modified since last read
        """
        dashboard_properties = request.get_json(force=True)
        # TODO: either convert all requests to use slugs or ids
        dashboard = models.Dashboard.get_by_id_and_org(dashboard_slug,
                                                       self.current_org)

        require_object_modify_permission(dashboard, self.current_user)

        updates = project(dashboard_properties,
                          ('name', 'layout', 'version', 'tags', 'is_draft',
                           'dashboard_filters_enabled'))

        # SQLAlchemy handles the case where a concurrent transaction beats us
        # to the update. But we still have to make sure that we're not starting
        # out behind.
        if 'version' in updates and updates['version'] != dashboard.version:
            abort(409)

        updates['changed_by'] = self.current_user

        self.update_model(dashboard, updates)
        models.db.session.add(dashboard)
        try:
            models.db.session.commit()
        except StaleDataError:
            abort(409)

        result = serialize_dashboard(dashboard,
                                     with_widgets=True,
                                     user=self.current_user)
        return result
Example #40
0
 def test_list(self):
     qs = models.QuerySnippet(
         trigger="x",
         description="y",
         snippet="z",
         user=self.factory.user,
         org=self.factory.org,
     )
     models.db.session.add(qs)
     models.db.session.commit()
     res = self.make_request("get", "/api/query_snippets", user=self.factory.user)
     self.assertEqual(res.status_code, 200)
     data = res.json
     self.assertEqual(len(data), 1)
     self.assertEqual(
         project(data[0], ["id", "trigger", "description", "snippet"]),
         {"id": 1, "trigger": "x", "description": "y", "snippet": "z"},
     )
     self.assertEqual(qs.trigger, "x")
     self.assertEqual(qs.description, "y")
     self.assertEqual(qs.snippet, "z")
Example #41
0
  def __init__(
    self, in_dim, out_dim, act="linear",
    W_regularizer=None, b_regularizer=None,
    bias=False, wrapped_input=False, **kwargs):
    kwargs = fy.project(kwargs, [
      "units", "input_shape", "activation", "use_bias",
      "kernel_initializer", "bias_initializer",
      "kernel_regularizer", "bias_regularizer",
      "kernel_constraint", "bias_constraint"])

    # keras.layer.Dense arg names have precedence over their aliases:
    kwargs = fy.merge(dict(
      units=out_dim, input_shape=(in_dim,),
      activation=act, use_bias=bias,
      kernel_regularizer=W_regularizer,
      bias_regularizer=b_regularizer
    ), kwargs)

    super().__init__(**kwargs)

    self.wrapped_input = wrapped_input
Example #42
0
    def get(self):
        if self.current_user.has_permission('admin'):
            data_sources = models.DataSource.all(self.current_org)
        else:
            data_sources = models.DataSource.all(
                self.current_org,
                group_ids=self.current_user.group_ids)

        response = {}
        for ds in data_sources:
            if ds.id in response:
                continue

            try:
                d = ds.to_dict()
                d['view_only'] = all(project(ds.groups, self.current_user.group_ids).values())
                response[ds.id] = d
            except AttributeError:
                logging.exception("Error with DataSource#to_dict (data source id: %d)", ds.id)

        return sorted(response.values(), key=lambda d: d['id'])
Example #43
0
def _get_full_context(cwd: pathlib.Path) -> dict:
    # load the context stored within the project repository
    context_path = cwd.joinpath(CONTEXT_FILE_NAME)
    if context_path.exists():
        with context_path.open('r') as f:
            context = json.load(f)
    else:
        raise FileNotFoundError(
            f'Could not find \'{CONTEXT_FILE_NAME}\', are you in a ballet '
            'project repo?')

    # find out if there are any new keys to prompt for
    with PROJECT_CONTEXT_PATH.open('r') as f:
        new_context = json.load(f)
    new_keys = set(new_context) - set(context['cookiecutter'])
    if new_keys:
        new_context_config = {'cookiecutter': funcy.project(new_context,
                                                            new_keys)}
        new_context = prompt_for_config(new_context_config)
        context['cookiecutter'].update(new_context)

    return context['cookiecutter']
Example #44
0
def public_dashboard(dashboard):
    dashboard_dict = project(dashboard.to_dict(),
                             ('name', 'layout', 'dashboard_filters_enabled',
                              'updated_at', 'created_at'))

    widget_list = models.Widget.select(models.Widget, models.Visualization, models.Query) \
        .where(models.Widget.dashboard == dashboard.id) \
        .join(models.Visualization, join_type=models.peewee.JOIN_LEFT_OUTER) \
        .join(models.Query, join_type=models.peewee.JOIN_LEFT_OUTER)
    widgets = {w.id: public_widget(w) for w in widget_list}

    widgets_layout = []
    for row in dashboard_dict['layout']:
        new_row = []
        for widget_id in row:
            widget = widgets.get(widget_id, None)
            if widget:
                new_row.append(widget)
        widgets_layout.append(new_row)

    dashboard_dict['widgets'] = widgets_layout
    return dashboard_dict
Example #45
0
    def get(self):
        print(self.current_org)
        print(self.current_user.group_ids)
        print(self.current_user.id)
        if self.current_user.has_permission('admin'):
            manage_boards = models.Dashboard.all(
                self.current_org,
                group_ids=self.current_user.group_ids,
                user_id=self.current_user.id)
            # models.Dashboard.all()
        else:
            manage_boards = models.Dashboard.all(
                self.current_org,
                group_ids=self.current_user.group_ids,
                user_id=self.current_user.id)
        print(manage_boards)
        response = {}
        for ds in manage_boards:
            if ds.id in response:
                continue

            try:
                d = ds.to_dict()
                d['view_only'] = all(
                    project({}, self.current_user.group_ids).values())
                response[ds.id] = d
            except AttributeError:
                logging.exception(
                    "Error with ManageBoards#to_dict (manage board id: %d)",
                    ds.id)

        self.record_event({
            'action': 'list',
            'object_id': 'admin/manage_boards',
            'object_type': 'manageboard',
        })

        return sorted(response.values(), key=lambda d: d['name'].lower())
Example #46
0
 def test_edit(self):
     qs = models.QuerySnippet(
         trigger="a",
         description="b",
         snippet="c",
         user=self.factory.user,
         org=self.factory.org,
     )
     models.db.session.add(qs)
     models.db.session.commit()
     res = self.make_request(
         "post",
         "/api/query_snippets/1",
         data={"trigger": "x", "description": "y", "snippet": "z"},
         user=self.factory.user,
     )
     self.assertEqual(
         project(res.json, ["id", "trigger", "description", "snippet"]),
         {"id": 1, "trigger": "x", "description": "y", "snippet": "z"},
     )
     self.assertEqual(qs.trigger, "x")
     self.assertEqual(qs.description, "y")
     self.assertEqual(qs.snippet, "z")
Example #47
0
async def get_list(request,
                   query,
                   type_=None,
                   relations={},
                   page=1,
                   page_size=20,
                   fields=(),
                   expand=()):
    items, count, took = await search(query, type_, page, page_size, fields)

    expanding_relations = project(relations, expand)
    items = await expand_multiple_items(items, expanding_relations)

    previous = get_previous_page_uri(request, page, page_size)
    next_ = get_next_page_uri(request, page, page_size, count)

    return {
        'count': count,
        'items': items,
        'took': took,
        'previous': previous,
        'next': next_,
    }
Example #48
0
def load_from_pipeline(stage, s_list, typ="outs"):
    if typ not in (stage.PARAM_OUTS, stage.PARAM_METRICS, stage.PARAM_PLOTS):
        raise ValueError(f"'{typ}' key is not allowed for pipeline files.")

    metric = typ == stage.PARAM_METRICS
    plot = typ == stage.PARAM_PLOTS

    d = _merge_data(s_list)

    for path, flags in d.items():
        plt_d = {}
        if plot:
            from dvc.schema import PLOT_PROPS

            plt_d, flags = _split_dict(flags, keys=PLOT_PROPS.keys())
        extra = project(
            flags,
            [
                BaseOutput.PARAM_CACHE,
                BaseOutput.PARAM_PERSIST,
                BaseOutput.PARAM_CHECKPOINT,
            ],
        )
        yield _get(stage, path, {}, plot=plt_d or plot, metric=metric, **extra)
Example #49
0
 def test_list(self):
     qs = models.QuerySnippet(trigger='x',
                              description='y',
                              snippet='z',
                              user=self.factory.user,
                              org=self.factory.org)
     models.db.session.add(qs)
     models.db.session.commit()
     res = self.make_request('get',
                             '/api/query_snippets',
                             user=self.factory.user)
     self.assertEqual(res.status_code, 200)
     data = res.json
     self.assertEqual(len(data), 1)
     self.assertEqual(
         project(data[0], ['id', 'trigger', 'description', 'snippet']), {
             'id': 1,
             'trigger': 'x',
             'description': 'y',
             'snippet': 'z',
         })
     self.assertEqual(qs.trigger, 'x')
     self.assertEqual(qs.description, 'y')
     self.assertEqual(qs.snippet, 'z')
Example #50
0
def embedjon(query_id, org_slug=None):
    visualizations=1
    query = models.Query.get_by_id_and_org(query_id, current_org)
    require_access(query.groups, current_user, view_only)
    qr = {}
    parameter_values = collect_parameters_from_request(request.args)
    qr = query.latest_query_data
    if settings.ALLOW_PARAMETERS_IN_EMBEDS == True and len(parameter_values) > 0:
        # run parameterized query
        #
        # WARNING: Note that the external query parameters
        #          are a potential risk of SQL injections.
        #
        results = run_query_sync(query.data_source, parameter_values, query.query)
        if results is None:
            abort(400, message="Unable to get results for this query")
        else:
            qr = {"data": json.loads(results)}
    elif qr is None:
        abort(400, message="No Results for this query")
    else:
        qr = qr.to_dict()


    record_event(current_org, current_user, {
        'action': 'embedjon',
        'query_id': query_id,
        'embed': True,
        'referer': request.headers.get('Referer')
    })

    client_config = {}
    client_config.update(settings.COMMON_CLIENT_CONFIG)

    qr = project(qr, ('data', 'id', 'retrieved_at'))
    return json_dumps(qr)
Example #51
0
 def test_create(self):
     res = self.make_request(
         "post",
         "/api/query_snippets",
         data={
             "trigger": "x",
             "description": "y",
             "snippet": "z"
         },
         user=self.factory.user,
     )
     self.assertEqual(
         project(res.json, ["id", "trigger", "description", "snippet"]),
         {
             "id": 1,
             "trigger": "x",
             "description": "y",
             "snippet": "z"
         },
     )
     qs = models.QuerySnippet.query.one()
     self.assertEqual(qs.trigger, "x")
     self.assertEqual(qs.description, "y")
     self.assertEqual(qs.snippet, "z")
Example #52
0
 def filter_unseen_vocab(self, vocab):
     self.word2vec = project(self.word2vec, vocab)
     self.vec2word = project(
         self.vec2word, [self.word2vec[word].tostring() for word in vocab])
Example #53
0
def _split_dict(d, keys):
    return project(d, keys), project(d, d.keys() - keys)
Example #54
0
    def sync(self):
        key_properties = self.catalog.get('key_properties')
        table = self.TABLE

        singer.write_schema(self.catalog.get('stream'),
                            self.catalog.get('schema'),
                            key_properties=key_properties)

        start = self.get_start_date(table)
        end = start
        interval = timedelta(hours=1)

        LOGGER.info('Syncing outbound activities.')

        while end < datetime.now(pytz.utc):
            self.login()
            start = end
            end = start + interval
            LOGGER.info("Fetching activities from {} to {}".format(start, end))

            _filter = self.make_filter(start, end)
            field_selector = get_field_selector(self.catalog.get('schema'))

            hasMore = True

            while hasMore:
                try:
                    results = \
                        self.client.service.readRecentOutboundActivities(
                            _filter)
                except suds.WebFault as e:
                    if '116' in e.fault.faultstring:
                        hasMore = False
                        break
                    else:
                        raise

                result_dicts = [
                    suds.sudsobject.asdict(result) for result in results
                ]

                parsed_results = [
                    field_selector(result) for result in result_dicts
                ]

                for result in parsed_results:
                    ids = [
                        'createdDate', 'activityType', 'contactId', 'listId',
                        'segmentId', 'keywordId', 'messageId'
                    ]

                    result['id'] = hashlib.md5('|'.join(
                        filter(
                            identity,
                            project(
                                result,
                                ids).values())).encode('utf-8')).hexdigest()

                singer.write_records(table, parsed_results)

                LOGGER.info('... {} results'.format(len(results)))

                _filter.readDirection = 'NEXT'

                if len(results) == 0:
                    hasMore = False

            self.state = incorporate(self.state, table, 'createdDate',
                                     start.replace(microsecond=0).isoformat())

            save_state(self.state)

        LOGGER.info('Done syncing outbound activities.')
def run_cv_features(event_info,
                    mapping_select,
                    results_dir,
                    cv,
                    return_models=False):
    # the dao object is created internally

    global event_corpus
    event_corpus, event_dates, event_art_num = event_info

    assert (os.path.isdir(results_dir))

    # Loading the CV periods here
    fname = "/research/home/rakesh/MyCode/WF_periods" + ".csv"  #  str(cv) +
    cv_periods = pd.read_csv(fname, index_col=0, header=0, parse_dates=True)

    # print cv_periods

    # Loading the returns that will be used across all CV periods
    tf = '/home/ryan/Dev/research-analysis/rgreen/amer-indicator-sets/packages/amer_v1/earnings/post-earnings/post1_earnings.csv'
    ret_data = load_returns_file(tf)

    global event_keys
    event_keys = event_corpus.keys(
    )  # indexed event keys referring to the entire event-article corpus

    model_results = [
    ]  # this is the combination of all CV models results and will be returned

    global tfidf_df
    global tfidf_index_list

    global eventkeys_to_gicsIG
    global IG
    global event_keys_intIG

    # create the event key to gics map here - make it global
    gvkey_to_gicsIG = ret_data[['gvkey', 'gics_industry_group']]
    gvkey_to_gicsIG = gvkey_to_gicsIG.drop_duplicates()
    gvkey_to_eventID = event_dates[['gvkey', 'gvkey + event_date']]
    gvkey_to_eventID = gvkey_to_eventID.drop_duplicates()

    eventkeys_to_gicsIG = pd.merge(gvkey_to_gicsIG,
                                   gvkey_to_eventID,
                                   on='gvkey',
                                   how='inner')
    eventkeys_to_gicsIG = eventkeys_to_gicsIG.drop_duplicates(
        'gvkey + event_date')
    IG = eventkeys_to_gicsIG['gics_industry_group'].unique(
    )  # list of industry group codes

    event_keys_intIG = intersection(
        event_keys, eventkeys_to_gicsIG['gvkey + event_date'].tolist())
    event_keys_intIG = set(event_keys_intIG)

    i = cv
    print cv_periods.at[i, 'train1-start']
    print cv_periods.at[i, 'train1-end']

    tloop = time.time()
    ##### Step 1: read event corpus dict and index according to date - to be able to split for cv purposes
    # selecting rows in event_dates ['dates'] column

    mask = (event_dates['date'] > cv_periods.at[i, 'train1-start']) & (
        event_dates['date'] < cv_periods.at[i, 'train1-end'])
    # mask = (event_dates['date'] > cv_periods.at[i, 'cv-start']) & (event_dates['date'] < cv_periods.at[i, 'cv-end'])
    # mask = (event_dates['date'] > cv_periods.at[i, 'train1-start']) & (event_dates['date'] < cv_periods.at[i, 'cv-end'])
    event_dates_select = event_dates.loc[mask]
    event_keys_select = event_dates_select['gvkey + event_date'].tolist(
    )  # event keys for the desired time frame

    # determine the event keys subset by comparing to event_keys
    event_keys_use = intersection(
        event_keys_select, event_keys)  # event_keys_use = event_keys_use[:10]
    event_corpus_use = project(event_corpus, event_keys_use)

    print "Event keys selected"

    event_corpus_use_list = []
    for ring in event_keys_use:
        event_corpus_use_list.append(event_corpus_use[ring])
    # event_corpus_use_list = event_corpus_use_list[:10]   # use only info from 10 events - as an example

    ##### Step 2: call feature_select_cv(event corpus (not in this cv period) and associated returns) to obtain the features
    event_keys_use_df = pd.DataFrame(event_keys_use,
                                     columns=['gvkey + event_date'])
    returns_cv_df = pd.merge(
        event_keys_use_df,
        event_dates,
        how='inner',
        on=['gvkey + event_date'],
        sort=False,  # left_index=True,
        suffixes=('_x', '_y'),
        copy=True,
        indicator=False)

    ##### Create Positive and Negative article lists
    returns_thresh = 0.0
    pos_corpus_use_list, neg_corpus_use_list, pos_keys, neg_keys = posneg_articles_split(
        event_corpus_use, returns_cv_df, returns_thresh)

    # # split the pos and negative event list and send it to two_word_count here to save the two word count for this particular CV period
    print "\nRunning Two Word Counter for CV fold", i
    full_vocab_list = TW_counts.main_caller(pos_corpus_use_list,
                                            neg_corpus_use_list,
                                            event_corpus_use_list, pos_keys,
                                            neg_keys, results_dir, i)

    # run the BNS feature selection function here - this could be in complete_cv_two itself
    Npos = len(pos_corpus_use_list)
    Nneg = len(neg_corpus_use_list)
    two_word_features = BNS_feature_select(results_dir, i, Npos, Nneg)

    feature_names = ['feature_' + x for x in two_word_features]
    print "Total no. of features (vocab words) considered are", len(
        feature_names)

    #### Step 3 build tfidf on event corpus (not in this cv period) w/ above features
    print "\nGenerating CV and Tfidf transformer"
    # count_vectorizer, tfidf_transformer, tfidf_df = tfidf_transform_gen(event_keys_use, event_corpus_use_list, two_word_features, full_vocab_list)
    count_vectorizer, tfidf_transformer_dict, tfidf_df = tfidf_transform_gen(
        event_keys_use, event_corpus_use_list, two_word_features,
        full_vocab_list)
    tfidf_index_list = tfidf_df.index.tolist()
    print len(tfidf_index_list), len(set(tfidf_index_list))
    del event_corpus_use, event_corpus_use_list

    ##### Step 4 score the (event corpus - complete set) and Step 5 is to generate the dao object - which can be fed below
    # Up to here only the events from the other CV folds are used - now w/ the above features the scores for all articles are calculated

    # mask = (mapping_select['date'] >= cv_periods.at[i, 'train1-start']) & (mapping_select['date'] <= cv_periods.at[i, 'train1-end'])
    mask = (mapping_select['date'] <= cv_periods.at[i, 'cv-end'])
    temp = mapping_select.loc[mask]
    # temp = mapping_select
    temp = temp.drop_duplicates(subset=['gvkey', 'event_date'])
    keys = temp.to_dict(
        orient='record')  # this is the df converted to a list! but why?

    # calculating tfidf scores for each event
    tstart = time.time()
    print "\nGenerating BoW scores for NB input"
    pool = mp.Pool(
        processes=20
    )  # if the whole loop gets parallelized - I might not want to sub parallelize...
    # inps = [(x, tfidf_transformer, event_art_num, two_word_features, full_vocab_list, feature_names) for x in keys]
    inps = [(x, tfidf_transformer_dict, event_art_num, two_word_features,
             full_vocab_list, feature_names) for x in keys]
    results = pool.map(tfidf_transform_calc, inps)
    bow_df = pd.DataFrame(results)
    bow_df['date'] = pd.to_datetime(bow_df['date'].values).date

    bow_df = bow_df.dropna(axis=0, how='any')

    print "Tfidf features fit in %0.1f secs" % (time.time() - tstart)

    ##### Step 5: Generating the dao object for this cv period - which will be used only once
    # so there will be a different dao object for each cv period - whereas there was one dao in the approach before

    # combine w/ returns to form dao object
    print "Merging with Returns File (Loaded above)"
    tstart = time.time()

    # Merge the BoW df with the returns using gvkey and date
    bow_df_merged = bow_df.merge(ret_data, on=['gvkey', 'date'], how='left')

    print "Loaded and merged returns file in %0.1f secs" % (time.time() -
                                                            tstart)

    ## saving the combined returns and bow df - to run from here if needed
    outname = os.path.join(results_dir, 'bow_tfidf_returns_' + str(i) + '.csv')
    bow_df_merged.to_csv(outname)

    del bow_df_merged, bow_df

    # # workin with new labels but old bow_df_merged
    # new_labels = ['gvkey', 'date', 'fwd_xmkt_projnorm_sec_0_10', 'fwd_xmkt_projnorm_sec_0_1', 'fwd_xmkt_0_10', 'fwd_xmkt_0_1']
    # tf = os.path.join(results_dir, 'bow_tfidf_returns_' + str(i) + '.csv')
    # bow_df_merged = pd.read_csv(tf, index_col=0)
    # bow_df_merged['date'] = pd.to_datetime(bow_df_merged['date'].values).date
    #
    # bow_df_merged = bow_df_merged.drop(new_labels[2:], axis=1)
    #
    # bow_df_merged2 = bow_df_merged.merge(ret_data[new_labels], on = ['gvkey', 'date'], how = 'left')
    # bow_df_merged2.to_csv(tf)
    # print bow_df_merged2.shape

    # ##### normal operation from here

    ## loading the saved bow and return df
    tf = os.path.join(results_dir, 'bow_tfidf_returns_' + str(i) + '.csv')
    dao = da.TrainingDataAccess(tf)

    ## This dao object is ready - need to add features to the config

    features = []
    for c in feature_names:
        if 'feature_' in c:
            features.append(c)

    print "Total no. of features (vocab words) considered are", len(features)

    config = config_set(features)
    print config.config_dict['run_params']['ycol_train']
    print config.config_dict['run_params']['ycol_eval']
    print config.config_dict['run_params']['binary_threshold']
    print config.config_dict['run_params']['filters']
    print config.config_dict['model']['class_name']

    ##### Step 6 Call the model building and eval functions - from fitting.py
    if (cv_periods.at[i, 'train2-start'] == "None"):  #%Y-%m-%d
        tup1 = (datetime.strptime(cv_periods.at[i, 'cv-start'],
                                  "%Y-%m-%d").date(),
                datetime.strptime(cv_periods.at[i, 'cv-end'],
                                  "%Y-%m-%d").date())
        tup2 = (datetime.strptime(cv_periods.at[i, 'train1-start'],
                                  "%Y-%m-%d").date(),
                datetime.strptime(cv_periods.at[i, 'train1-end'],
                                  "%Y-%m-%d").date())
        # tup2 = tup1
        # tup2 = (datetime.strptime(cv_periods.at[i, 'train1-start'], "%Y-%m-%d").date(), datetime.strptime(cv_periods.at[i, 'train1-end'], "%Y-%m-%d").date())

        arg = (dao, config, tup1, tup2, None
               )  #cv_periods.at[i, 'train2-start']

    else:
        tup1 = (datetime.strptime(cv_periods.at[i, 'cv-start'],
                                  "%Y-%m-%d").date(),
                datetime.strptime(cv_periods.at[i, 'cv-end'],
                                  "%Y-%m-%d").date())
        tup2 = (datetime.strptime(cv_periods.at[i, 'train1-start'],
                                  "%Y-%m-%d").date(),
                datetime.strptime(cv_periods.at[i, 'train1-end'],
                                  "%Y-%m-%d").date())
        tup3 = (datetime.strptime(cv_periods.at[i, 'train2-start'],
                                  "%Y-%m-%d").date(),
                datetime.strptime(cv_periods.at[i, 'train2-end'],
                                  "%Y-%m-%d").date())

        arg = (dao, config, tup1, tup2, tup3)

    tstart = time.time()

    print "Started NB Model building and prediction"
    model_results.append(
        fitting.train_worker_cv(arg)
    )  # can parallelize this separately by keeping all the dao and config objects created and sending them out!
    print "Completed NB in %0.1f secs" % (time.time() - tstart)

    print "Loop %d completed in %0.1f secs" % (i, time.time() - tloop)

    print "Out of the loop"

    icr_list = []
    icr_list_tr = []
    model_dict = {}
    for result in model_results:
        icr_list.append(result[0])
        icr_list_tr.append(result[2])
        cv_start_date = result[0]['cv-start-date'].values[0]
        cv_end_date = result[0]['cv-end-date'].values[0]
        model = result[1]  # this is an NB model
        datestr = cv_end_date.strftime("%Y%m%d")
        model_dict[(cv_start_date, cv_end_date)] = model
        model.persist(results_dir, datestr)

    results_df = pd.concat(icr_list)
    results_df = results_df.sort_values(by='date')
    assert not results_df.duplicated(subset=['gvkey', 'date']).any()
    # results_df.to_csv(os.path.join(results_dir, 'IC_results.csv'))
    cv_periods.to_csv(os.path.join(results_dir, 'cv_periods.csv'))

    # results_dir = '/research/home/rakesh/results/WalkForward/new_WF/TW_FS_PRL/check_day0_10rets2'
    outname = 'results_raw' + str(i) + '.csv'
    fname = os.path.join(results_dir, outname)
    results_df.to_csv(fname)

    # saving training results
    results_df2 = pd.concat(icr_list_tr)
    results_df2 = results_df2.sort_values(by='date')
    assert not results_df2.duplicated(subset=['gvkey', 'date']).any()

    outname = 'results_train' + str(i) + '.csv'
    fname = os.path.join(results_dir, outname)
    results_df2.to_csv(fname)

    # save config file for each run!
    outname = 'config_selected' + '.pkl'
    fname = os.path.join(results_dir, outname)
    output = open(fname, 'wb')
    pickle.dump(config, output)
    output.close()

    # remove all the pkls
    os.remove(os.path.join(results_dir, 'negcounts_list_' + str(i) + '.pkl'))
    os.remove(os.path.join(results_dir, 'poscounts_list_' + str(i) + '.pkl'))
    os.remove(os.path.join(results_dir, 'negcounts_' + str(i) + '.pkl'))
    os.remove(os.path.join(results_dir, 'poscounts_' + str(i) + '.pkl'))
    # os.remove(os.path.join(results_dir, 'negfilter_list_' + str(i) + '.pkl'))
    # os.remove(os.path.join(results_dir, 'posfilter_list_' + str(i) + '.pkl'))

    # results_df = []
    if return_models:
        return results_df, model_dict
    else:
        return results_df
Example #56
0
    def post(self, user_id):
        require_admin_or_owner(user_id)
        user = models.User.get_by_id_and_org(user_id, self.current_org)

        req = request.get_json(True)

        params = project(
            req, ('email', 'name', 'password', 'old_password', 'group_ids'))

        if 'password' in params and 'old_password' not in params:
            abort(403,
                  message="Must provide current password to update password.")

        if 'old_password' in params and not user.verify_password(
                params['old_password']):
            abort(403, message="Incorrect current password.")

        if 'password' in params:
            user.hash_password(params.pop('password'))
            params.pop('old_password')

        if 'group_ids' in params:
            if not self.current_user.has_permission('admin'):
                abort(403,
                      message="Must be admin to change groups membership.")

            for group_id in params['group_ids']:
                try:
                    models.Group.get_by_id_and_org(group_id, self.current_org)
                except NoResultFound:
                    abort(400,
                          message="Group id {} is invalid.".format(group_id))

            if len(params['group_ids']) == 0:
                params.pop('group_ids')

        if 'email' in params:
            _, domain = params['email'].split('@', 1)

            if domain.lower() in blacklist or domain.lower() == 'qq.com':
                abort(400, message='Bad email address.')

        email_address_changed = 'email' in params and params[
            'email'] != user.email
        needs_to_verify_email = email_address_changed and settings.email_server_is_configured(
        )
        if needs_to_verify_email:
            user.is_email_verified = False

        try:
            self.update_model(user, params)
            models.db.session.commit()

            if needs_to_verify_email:
                send_verify_email(user, self.current_org)

            # The user has updated their email or password. This should invalidate all _other_ sessions,
            # forcing them to log in again. Since we don't want to force _this_ session to have to go
            # through login again, we call `login_user` in order to update the session with the new identity details.
            if current_user.id == user.id:
                login_user(user, remember=True)
        except IntegrityError as e:
            if "email" in e.message:
                message = "Email already taken."
            else:
                message = "Error updating record"

            abort(400, message=message)

        self.record_event({
            'action': 'edit',
            'object_id': user.id,
            'object_type': 'user',
            'updated_fields': params.keys()
        })

        return user.to_dict(with_api_key=is_admin_or_owner(user_id))
Example #57
0
 def project(self, keys):
     return self.transform(lambda v: fn.project(v, keys)) \
                .filter(lambda v: v.keys() & set(keys))
Example #58
0
    def _eval_stl(x, t=0):
        evaluated = stl.utils.eval_lineqs(phi, x)

        evaluated.update(fn.project(x, ap_names))
        return bool(eval_stl(phi, dt)(evaluated)[t])