class PowResult(ModelMixin, mg.Document): meta = {"collection": "zil_pow_results", "strict": False} header = mg.StringField(max_length=128, required=True) seed = mg.StringField(max_length=128, required=True) boundary = mg.StringField(max_length=128, required=True) pub_key = mg.StringField(max_length=128, required=True) mix_digest = mg.StringField(max_length=128, required=True) nonce = mg.StringField(max_length=128, required=True) hash_result = mg.StringField(max_length=128, required=True) block_num = mg.IntField(default=0) pow_fee = mg.FloatField(default=0.0) finished_date = mg.DateField() finished_time = mg.DateTimeField() verified_time = mg.DateTimeField() verified = mg.BooleanField(default=False) miner_wallet = mg.StringField(max_length=128) worker_name = mg.StringField(max_length=64, default="") def __str__(self): return f"[PowResult: {self.pub_key}, {self.header}]" @classmethod def avg_pow_fee(cls, block_num): return cls.query(block_num=block_num).average("pow_fee") @classmethod def get_pow_result(cls, header, boundary, pub_key=None, order="-finished_time"): query = Q(header=header) & Q(boundary=boundary) if pub_key is not None: query = query & Q(pub_key=pub_key) cursor = cls.objects(query).order_by(order) # default to get latest one return cursor.first() @classmethod def epoch_rewards(cls, block_num=None, miner_wallet=None, worker_name=None): match = {} if block_num is not None: if isinstance(block_num, int): match = { "block_num": { "$eq": block_num, } } else: start, end = block_num match = { "block_num": { "$gte": start, "$lte": end, } } if miner_wallet is not None: match.update({ "miner_wallet": { "$eq": miner_wallet, } }) if worker_name is not None: match.update({ "worker_name": { "$eq": worker_name, } }) group = { "_id": None, "rewards": {"$sum": "$pow_fee"}, "count": {"$sum": 1}, "verified": {"$sum": {"$cond": ["$verified", 1, 0]}}, "first_work_at": {"$min": "$finished_time"}, "last_work_at": {"$max": "$finished_time"} } pipeline = [ {"$match": match}, {"$group": group}, ] res = list(cls.objects.aggregate(*pipeline)) if res: rewards = res[0] rewards.pop("_id", None) return rewards return {"rewards": None, "count": 0, "verified": 0, "first_work_at": None, "last_work_at": None} @classmethod def rewards_by_miners(cls, block_num): if block_num is None: block_num = PowWork.get_latest_block_num() match = { "block_num": { "$eq": block_num, } } group = { "_id": "$miner_wallet", "block_num": {"$first": "$block_num"}, "date": {"$first": "$finished_date"}, "date_time": {"$first": "$finished_time"}, "rewards": {"$sum": "$pow_fee"}, "finished": {"$sum": 1}, "verified": {"$sum": {"$cond": ["$verified", 1, 0]}}, } project = { "_id": 0, "miner_wallet": "$_id", "block_num": 1, "date": 1, "date_time": 1, "rewards": 1, "finished": 1, "verified": 1, } pipeline = [ {"$match": match}, {"$group": group}, {"$project": project} ] return list(cls.objects.aggregate(*pipeline)) def get_worker(self): return miner.Worker.get_or_create(self.miner_wallet, self.worker_name)
class SuperEnalottoEntry(mongoengine.Document): user_id = mongoengine.IntField(required=True) date_valid = mongoengine.DateTimeField( default=datetime.datetime.now(config.TZ_ZONEINFO)) numbers = mongoengine.ListField(mongoengine.IntField(), required=True)
class Author(mongo.Document): first_name = mongo.StringField(required=True) last_name = mongo.StringField() email = mongo.EmailField(required=True) birthdate = mongo.DateTimeField()
class User(me.Document): # The fields needed to display a user's name and profile picture. CORE_FIELDS = ['first_name', 'last_name', 'fbid', 'email'] class JoinSource(object): FACEBOOK = 1 EMAIL = 2 class UserCreationError(Exception): pass meta = { 'indexes': [ 'fb_access_token', 'fbid', # TODO(mack): need to create the 'api_key' index on prod 'api_key', # Allow users with email=None, but non-None emails must be unique { 'fields': ['email'], 'unique': True, 'sparse': True, }, 'referrer_id', ], } # Randomly generated ID used to access some subset of user's information # without going through any ACL. Used for e.g. sharing schedules with non # flow users. # # e.g. A8RLLZTMX secret_id = me.StringField() # TODO(mack): join_date should be encapsulate in _id, but store it # for now, just in case; can remove it when sure that info is in _id join_date = me.DateTimeField(required=True) join_source = me.IntField(required=True, choices=[JoinSource.FACEBOOK, JoinSource.EMAIL]) referrer_id = me.ObjectIdField(required=False) # eg. Mack first_name = me.StringField(required=True) middle_name = me.StringField() # eg. Duan last_name = me.StringField(required=True) # TODO(mack): check if facebook always returns gender field gender = me.StringField(choices=['male', 'female']) # eg. 1647810326 fbid = me.StringField() # http://stackoverflow.com/questions/4408945/what-is-the-length-of-the-access-token-in-facebook-oauth2 fb_access_token = me.StringField(max_length=255) fb_access_token_expiry_date = me.DateTimeField() # The token expired due to de-auth, logging out, etc (ie. not time expired) fb_access_token_invalid = me.BooleanField(default=False) email = me.EmailField() password = me.StringField() # eg. list of user objectids, could be friends from sources besides # facebook friend_ids = me.ListField(me.ObjectIdField()) # eg. list of fbids of friends from facebook, not necessarily all of whom # use the site friend_fbids = me.ListField(me.StringField()) birth_date = me.DateTimeField() last_visited = me.DateTimeField() # TODO(mack): consider using SequenceField() num_visits = me.IntField(min_value=0, default=0) # The last time the user visited the onboarding page last_show_onboarding = me.DateTimeField() # The last time the user was shown the import schedule view last_show_import_schedule = me.DateTimeField() # eg. mduan or 20345619 ? student_id = me.StringField() # eg. university_of_waterloo ? school_id = me.StringField() # eg. software_engineering ? # TODO(mack): should store program_id, not program_name # program_id = me.StringField() program_name = me.StringField() # List of UserCourse.id's course_history = me.ListField(me.ObjectIdField()) # TODO(mack): figure out why last_term_id was commented out in # a prior diff: #260f174 # Deprecated last_term_id = me.StringField() # Deprecated last_program_year_id = me.StringField() # Track the number of times the user has invited friends # (So we can award points if they have) num_invites = me.IntField(min_value=0, default=0) # The number of points this user has. Point are awarded for a number of # actions such as reviewing courses, inviting friends. This is a cached # point total. It will be calculated once a day with aggregator.py num_points = me.IntField(min_value=0, default=0) is_admin = me.BooleanField(default=False) # TODO(mack): refactor this into something maintainable sent_exam_schedule_notifier_email = me.BooleanField(default=False) sent_velocity_demo_notifier_email = me.BooleanField(default=False) sent_raffle_notifier_email = me.BooleanField(default=False) sent_raffle_end_notifier_email = me.BooleanField(default=False) sent_schedule_sharing_notifier_email = me.BooleanField(default=False) sent_course_enrollment_feb_8_email = me.BooleanField(default=False) sent_referral_contest_email = me.BooleanField(default=False) sent_referral_contest_end_email = me.BooleanField(default=False) sent_welcome_email = me.BooleanField(default=False) email_unsubscribed = me.BooleanField(default=False) # Note: Backfilled on night of Nov. 29th, 2012 transcripts_imported = me.IntField(min_value=0, default=0) schedules_imported = me.IntField(min_value=0, default=0) last_bad_schedule_paste = me.StringField() last_good_schedule_paste = me.StringField() last_bad_schedule_paste_date = me.DateTimeField() last_good_schedule_paste_date = me.DateTimeField() # Whether this user imported a schedule when it was still broken and we # should email them to apologize schedule_sorry = me.BooleanField(default=False) # API key that grants user to login_required APIs api_key = me.StringField() last_prompted_for_review = me.DateTimeField(default=datetime.datetime.min) voted_course_review_ids = me.ListField(me.StringField()) voted_prof_review_ids = me.ListField(me.StringField()) # Scholarships where a user has clicked: "Remove from profile" closed_scholarship_ids = me.ListField(me.StringField()) @property def name(self): return '%s %s' % (self.first_name, self.last_name) def save(self, *args, **kwargs): # TODO(mack): If _changed_fields attribute does not exist, it mean # document has been saved yet. Just need to verify. In this case, # we could just check if id has been set first_save = not hasattr(self, '_changed_fields') if first_save: # TODO(Sandy): We're assuming people won't unfriend anyone. # Fix this later? # TODO(mack): this isn't safe against race condition of both # friends signing up at same time #print 'friend_fbids', self.friend_fbids friends = (User.objects(fbid__in=self.friend_fbids).only( 'id', 'friend_ids')) self.friend_ids = [f.id for f in friends] super(User, self).save(*args, **kwargs) if first_save: # TODO(mack): should do this asynchronously # Using update rather than save because it should be more efficient friends.update(add_to_set__friend_ids=self.id) # TODO(mack): think of better way to cache value @property def course_ids(self): if not hasattr(self, '_course_ids'): user_courses = _user_course.UserCourse.objects( id__in=self.course_history).only('course_id') self._course_ids = [uc.course_id for uc in user_courses] return self._course_ids @property def profile_pic_urls(self): if self.fbid is not None: urls = self._get_fb_pic_urls() else: urls = self._get_gravatar_pic_urls() return urls def _get_fb_pic_urls(self): base_pic = "https://graph.facebook.com/%s/picture" % (self.fbid) return { 'default': base_pic, 'large': '%s?type=large' % (base_pic), 'square': '%s?type=square' % (base_pic), } def _get_gravatar_pic_urls(self): # Gravatar API: https://en.gravatar.com/site/implement/images/ # TODO(sandy): Serve our own default image instead of the mystery man email_hash = hashlib.md5(self.email.strip().lower()).hexdigest() base_pic = "https://secure.gravatar.com/avatar/%s?d=mm" % (email_hash) return { 'default': "%s&size=%s" % (base_pic, "50"), 'large': "%s&size=%s" % (base_pic, "190"), 'square': "%s&size=%s" % (base_pic, "50"), } @property def profile_url(self): return '/profile/%s' % self.id @property def absolute_profile_url(self): return '%s%s?admin=1' % (settings.RMC_HOST, self.profile_url) @property def short_program_name(self): if self.program_name: return self.program_name.split(',')[0] return '' @property def has_course_history(self): # TODO(Sandy): Using this to backfill transcripts imported, # remove later if len(self.course_history) == 0: return False for uc in self.get_user_courses(): if not _term.Term.is_shortlist_term(uc.term_id): return True return False @property def has_shortlisted(self): for uc in self.get_user_courses(): if _term.Term.is_shortlist_term(uc.term_id): return True return False @property def has_schedule(self): # TODO(Sandy): Actually this only works assuming users never remove # their schedule and we'll have to do actual queries when 2013_05 comes return self.schedules_imported > 0 @property def should_renew_fb_token(self): # Should renew FB token if it expired or will expire "soon". future_date = datetime.datetime.now() + datetime.timedelta( days=facebook.FB_FORCE_TOKEN_EXPIRATION_DAYS) return (self.fb_access_token_expiry_date is None or self.fb_access_token_expiry_date < future_date or self.fb_access_token_invalid) @property def is_fb_token_expired(self): return (self.fb_access_token_expiry_date is None or self.fb_access_token_expiry_date < datetime.datetime.now() or self.fb_access_token_invalid) @property def is_demo_account(self): return self.fbid == settings.DEMO_ACCOUNT_FBID @property def last_schedule_paste(self): return self.last_good_schedule_paste or self.last_bad_schedule_paste def get_user_courses(self): return _user_course.UserCourse.objects(id__in=self.course_history) @classmethod def cls_mutual_courses_redis_key(cls, user_id_one, user_id_two): if user_id_one < user_id_two: first_id = user_id_one second_id = user_id_two else: first_id = user_id_two second_id = user_id_one return 'mutual_courses:%s:%s' % (first_id, second_id) def mutual_courses_redis_key(self, other_user_id): return User.cls_mutual_courses_redis_key(self.id, other_user_id) def get_mutual_course_ids(self, redis): # fetch mutual friends from redis pipe = redis.pipeline() # Show mutual courses between the viewing user and the friends of the # profile user for friend_id in self.friend_ids: pipe.smembers(self.mutual_courses_redis_key(friend_id)) mutual_course_ids_per_user = pipe.execute() zipped = itertools.izip(self.friend_ids, mutual_course_ids_per_user) mutual_course_ids_by_friend = {} for friend_id, mutual_course_ids in zipped: mutual_course_ids_by_friend[friend_id] = mutual_course_ids return mutual_course_ids_by_friend def cache_mutual_course_ids(self, redis): friends = User.objects(id__in=self.friend_ids).only('course_history') friend_map = {} for friend in friends: friend_map[friend.id] = friend my_course_ids = set(self.course_ids) for friend in friends: mutual_course_ids = my_course_ids.intersection(friend.course_ids) if mutual_course_ids: redis_key = self.mutual_courses_redis_key(friend.id) redis.sadd(redis_key, *list(mutual_course_ids)) def remove_mutual_course_ids(self, redis): pipe = redis.pipeline() for friend_id in self.friend_ids: pipe.delete(self.mutual_courses_redis_key(friend_id)) return pipe.execute() def get_latest_program_year_id(self): latest_term_uc = None for uc_dict in self.get_user_courses(): # Ignore untaken courses or shortlisted courses if uc_dict['term_id'] > util.get_current_term_id(): continue if not latest_term_uc: latest_term_uc = uc_dict elif uc_dict['term_id'] > latest_term_uc['term_id']: latest_term_uc = uc_dict if latest_term_uc: return latest_term_uc['program_year_id'] return None def get_friends(self): """Gets basic info for each of this user's friends.""" return User.objects(id__in=self.friend_ids).only( *(User.CORE_FIELDS + ['id', 'num_points', 'num_invites', 'program_name'])) def rated_review(self, review_id, review_type): if review_type == 'course': return review_id in self.voted_course_review_ids else: return review_id in self.voted_prof_review_ids def to_dict(self, extended=True, include_course_ids=False): user_dict = { 'id': self.id, 'fbid': self.fbid, 'first_name': self.first_name, 'last_name': self.last_name, 'name': self.name, 'profile_pic_urls': self.profile_pic_urls, 'program_name': self.short_program_name, 'num_invites': self.num_invites, 'num_points': self.num_points, } if extended: user_dict.update({ 'friend_ids': self.friend_ids, 'course_history': self.course_history, }) if include_course_ids: user_dict['course_ids'] = self.course_ids return user_dict # TODO(mack): make race condition safe? def delete(self, *args, **kwargs): # Remove this user from the friend lists of all friends friends = User.objects(id__in=self.friend_ids) friends.update(pull__friend_ids=self.id) # Delete all their user course objects _user_course.UserCourse.objects(user_id=self.id).delete() # Delete all their UserScheduleItem objects _user_schedule_item.UserScheduleItem.objects(user_id=self.id).delete() # TODO(mack): delete mutual course information from redis? # should be fine for now since we are removing this user from their # friends' friend_ids, and redis cache will be regenerated daily # from aggregator.py return super(User, self).delete(*args, **kwargs) def to_review_author_dict(self, current_user, reveal_identity): is_current_user = current_user and current_user.id == self.id if reveal_identity: return { 'id': self.id, 'name': 'You' if is_current_user else self.name, 'profile_pic_url': self.profile_pic_urls['square'], } else: return {'program_name': self.short_program_name} def invite_friend(self, redis): self.num_invites += 1 if self.num_invites == 1: self.award_points(_points.PointSource.FIRST_INVITE, redis) def award_points(self, points, redis): self.num_points += points redis.incr('total_points', points) def update_fb_friends(self, fbids): self.friend_fbids = fbids fb_friends = (User.objects(fbid__in=self.friend_fbids).only( 'id', 'friend_ids')) # We have friends from only Facebook right now, so just set it self.friend_ids = [f.id for f in fb_friends] def get_schedule_item_dicts(self, exam_objs=None): """Gets all schedule items for this user starting no later than a year ago. Args: exam_objs: Optional exam objects to convert to UserScheduleItem and add to return list. Returns: a list of UserScheduleItem models as dicts. """ one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365) schedule_item_objs = _user_schedule_item.UserScheduleItem.objects( user_id=self.id, start_date__gte=one_year_ago) dicts = [si.to_dict() for si in schedule_item_objs] if exam_objs: dicts.extend(e.to_schedule_obj().to_dict() for e in exam_objs) return dicts def get_failed_schedule_item_dicts(self): one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365) schedule_item_objs = _user_schedule_item.FailedScheduleItem.objects( user_id=self.id, parsed_date__gte=one_year_ago) return [si.to_dict() for si in schedule_item_objs] def get_all_schedule_items(self): return _user_schedule_item.UserScheduleItem.objects(user_id=self.id) def get_current_term_exams(self, current_term_course_ids=None): if not current_term_course_ids: ucs = (self.get_user_courses().filter( term_id=util.get_current_term_id()).only('course_id')) current_term_course_ids = [uc.course_id for uc in ucs] return _exam.Exam.objects(course_id__in=current_term_course_ids) def get_secret_id(self): # TODO(jlfwong): This is possibly a race condition... if self.secret_id is None: self.secret_id = util.generate_secret_id() self.save() return self.secret_id def add_course(self, course_id, term_id, program_year_id=None): """Creates a UserCourse and adds it to the user's course_history. Idempotent. Returns the resulting UserCourse. """ user_course = _user_course.UserCourse.objects( user_id=self.id, course_id=course_id).first() if user_course is None: if _course.Course.objects.with_id(course_id) is None: # Non-existant course according to our data rmclogger.log_event(rmclogger.LOG_CATEGORY_DATA_MODEL, rmclogger.LOG_EVENT_UNKNOWN_COURSE_ID, course_id) return None user_course = _user_course.UserCourse( user_id=self.id, course_id=course_id, term_id=term_id, program_year_id=program_year_id, ) else: # Record only the latest attempt for duplicate/failed courses if (term_id > user_course.term_id or user_course.term_id == _term.Term.SHORTLIST_TERM_ID): user_course.term_id = term_id user_course.program_year_id = program_year_id user_course.save() if user_course.id not in self.course_history: self.course_history.append(user_course.id) self.save() return user_course # Generate a random api key granting this user to access '/api/' routes def grant_api_key(self): uuid_ = uuid.uuid4() md5 = hashlib.md5() md5.update(str(uuid_)) microsecs = int(time.time() * 1000000) raw_api_key = str(microsecs) + md5.hexdigest() self.api_key = base64.b64encode(raw_api_key) self.save() return self.api_key def next_course_to_review(self): user_courses = _user_course.UserCourse.objects(user_id=self.id) return _user_course.UserCourse.select_course_to_review(user_courses) def should_prompt_review(self): now = datetime.datetime.now() elapsed = min(now - self.last_prompted_for_review, now - self.join_date) return elapsed.days > PROMPT_TO_REVIEW_DELAY_DAYS @staticmethod def auth_user(email, password): """Returns the authenticated user or None.""" user = User.objects(email=email) if not user: return None # TODO(sandy): Since we added a unique index on email, this shouldn't # happen anymore. But keep this around for a bit, in case something # messes up [Apr 8, 2014] if user.count() > 1: logging.error('Multiple email addressed matched: %s' % email) return None user = user.first() # TODO(sandy): Provide more helpful errors for users signed up with fb if (not user.password or not bcrypt.check_password_hash(user.password, password)): return None return user @staticmethod def create_new_user_from_email(first_name, last_name, email, password): if len(password) < PASSWORD_MIN_LENGTH: raise User.UserCreationError( 'Passwords must be at least 8 characters long.') user = User( email=email, first_name=first_name, join_date=datetime.datetime.now(), join_source=User.JoinSource.EMAIL, last_name=last_name, password=bcrypt.generate_password_hash(password, rounds=BCRYPT_ROUNDS), ) try: user.save() except me.base.ValidationError as e: if 'email' in e.errors: raise User.UserCreationError('Oops, that email is invalid.') raise except me.queryset.NotUniqueError as e: raise User.UserCreationError( 'That email is already signed up.' ' (Maybe you already signed up with Facebook?)') return user def __repr__(self): return "<User: %s>" % self.name.encode('utf-8')
class MStatistics(mongo.Document): key = mongo.StringField(unique=True) value = mongo.DynamicField() expiration_date = mongo.DateTimeField() meta = { 'collection': 'statistics', 'allow_inheritance': False, 'indexes': ['key'], } def __unicode__(self): return "%s: %s" % (self.key, self.value) @classmethod def get(cls, key, default=None): obj = cls.objects.filter(key=key).first() if not obj: return default if obj.expiration_date and obj.expiration_date < datetime.datetime.now( ): obj.delete() return default return obj.value @classmethod def set(cls, key, value, expiration_sec=None): try: obj = cls.objects.get(key=key) except cls.DoesNotExist: obj = cls.objects.create(key=key) obj.value = value if expiration_sec: obj.expiration_date = datetime.datetime.now() + datetime.timedelta( seconds=expiration_sec) obj.save() @classmethod def all(cls): stats = cls.objects.all() values = dict([(stat.key, stat.value) for stat in stats]) for key, value in values.items(): if key in ('avg_time_taken', 'sites_loaded', 'stories_shared'): values[key] = json.decode(value) elif key in ('feeds_fetched', 'premium_users', 'standard_users', 'latest_sites_loaded', 'max_sites_loaded', 'max_stories_shared'): values[key] = int(value) elif key in ('latest_avg_time_taken', 'max_avg_time_taken', 'last_5_min_time_taken'): values[key] = float(value) values['total_sites_loaded'] = sum( values['sites_loaded']) if 'sites_loaded' in values else 0 values['total_stories_shared'] = sum( values['stories_shared']) if 'stories_shared' in values else 0 return values @classmethod def collect_statistics(cls): now = datetime.datetime.now() cls.collect_statistics_premium_users() print "Premiums: %s" % (datetime.datetime.now() - now) cls.collect_statistics_standard_users() print "Standard users: %s" % (datetime.datetime.now() - now) cls.collect_statistics_sites_loaded() print "Sites loaded: %s" % (datetime.datetime.now() - now) cls.collect_statistics_stories_shared() print "Stories shared: %s" % (datetime.datetime.now() - now) cls.collect_statistics_for_db() print "DB Stats: %s" % (datetime.datetime.now() - now) cls.collect_statistics_feeds_fetched() print "Feeds Fetched: %s" % (datetime.datetime.now() - now) @classmethod def collect_statistics_feeds_fetched(cls): feeds_fetched = RStats.count('feed_fetch', hours=24) cls.objects(key='feeds_fetched').update_one(upsert=True, set__key='feeds_fetched', set__value=feeds_fetched) return feeds_fetched @classmethod def collect_statistics_premium_users(cls): last_day = datetime.datetime.now() - datetime.timedelta(hours=24) premium_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=True).count() cls.objects(key='premium_users').update_one(upsert=True, set__key='premium_users', set__value=premium_users) return premium_users @classmethod def collect_statistics_standard_users(cls): last_day = datetime.datetime.now() - datetime.timedelta(hours=24) standard_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=False).count() cls.objects(key='standard_users').update_one(upsert=True, set__key='standard_users', set__value=standard_users) return standard_users @classmethod def collect_statistics_sites_loaded(cls): now = round_time(datetime.datetime.now(), round_to=60) sites_loaded = [] avg_time_taken = [] last_5_min_time_taken = 0 r = redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL) for hour in range(24): start_hours_ago = now - datetime.timedelta(hours=hour + 1) pipe = r.pipeline() for m in range(60): minute = start_hours_ago + datetime.timedelta(minutes=m) key = "%s:%s" % (RStats.stats_type('page_load'), minute.strftime('%s')) pipe.get("%s:s" % key) pipe.get("%s:a" % key) times = pipe.execute() counts = [int(c) for c in times[::2] if c] avgs = [float(a) for a in times[1::2] if a] if hour == 0: last_5_min_time_taken = round( sum(avgs[:1]) / max(1, sum(counts[:1])), 2) if counts and avgs: count = max(1, sum(counts)) avg = round(sum(avgs) / count, 3) else: count = 0 avg = 0 sites_loaded.append(count) avg_time_taken.append(avg) sites_loaded.reverse() avg_time_taken.reverse() values = ( ('sites_loaded', json.encode(sites_loaded)), ('avg_time_taken', json.encode(avg_time_taken)), ('latest_sites_loaded', sites_loaded[-1]), ('latest_avg_time_taken', avg_time_taken[-1]), ('max_sites_loaded', max(sites_loaded)), ('max_avg_time_taken', max(1, max(avg_time_taken))), ('last_5_min_time_taken', last_5_min_time_taken), ) for key, value in values: cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value) @classmethod def collect_statistics_stories_shared(cls): now = datetime.datetime.now() stories_shared = [] for hour in range(24): start_hours_ago = now - datetime.timedelta(hours=hour) end_hours_ago = now - datetime.timedelta(hours=hour + 1) shares = MSharedStory.objects.filter( shared_date__lte=start_hours_ago, shared_date__gte=end_hours_ago).count() stories_shared.append(shares) stories_shared.reverse() values = ( ('stories_shared', json.encode(stories_shared)), ('latest_stories_shared', stories_shared[-1]), ('max_stories_shared', max(stories_shared)), ) for key, value in values: cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value) @classmethod def collect_statistics_for_db(cls): lag = db_functions.mongo_max_replication_lag(settings.MONGODB) cls.set('mongodb_replication_lag', lag) now = round_time(datetime.datetime.now(), round_to=60) r = redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL) db_times = {} latest_db_times = {} for db in [ 'sql', 'mongo', 'redis', 'task_sql', 'task_mongo', 'task_redis' ]: db_times[db] = [] for hour in range(24): start_hours_ago = now - datetime.timedelta(hours=hour + 1) pipe = r.pipeline() for m in range(60): minute = start_hours_ago + datetime.timedelta(minutes=m) key = "DB:%s:%s" % (db, minute.strftime('%s')) pipe.get("%s:c" % key) pipe.get("%s:t" % key) times = pipe.execute() counts = [int(c or 0) for c in times[::2]] avgs = [float(a or 0) for a in times[1::2]] if counts and avgs: count = sum(counts) avg = round(sum(avgs) / count, 3) if count else 0 else: count = 0 avg = 0 if hour == 0: latest_count = float(counts[-1]) if len(counts) else 0 latest_avg = float(avgs[-1]) if len(avgs) else 0 latest_db_times[ db] = latest_avg / latest_count if latest_count else 0 db_times[db].append(avg) db_times[db].reverse() values = ( ('avg_sql_times', json.encode(db_times['sql'])), ('avg_mongo_times', json.encode(db_times['mongo'])), ('avg_redis_times', json.encode(db_times['redis'])), ('latest_sql_avg', latest_db_times['sql']), ('latest_mongo_avg', latest_db_times['mongo']), ('latest_redis_avg', latest_db_times['redis']), ('latest_task_sql_avg', latest_db_times['task_sql']), ('latest_task_mongo_avg', latest_db_times['task_mongo']), ('latest_task_redis_avg', latest_db_times['task_redis']), ) for key, value in values: cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value)
class promotions(mongoengine.Document): """Promotions Collection from the MongoDB instance""" insert_date = mongoengine.DateTimeField() game = mongoengine.StringField() price = mongoengine.DecimalField() currency = mongoengine.StringField()
class TokenDB(stormbase.StormFoundationDB): user = me.StringField(required=True) token = me.StringField(required=True, unique=True) expiry = me.DateTimeField(required=True) metadata = me.DictField(required=False, help_text='Arbitrary metadata associated with this token')
class Project(mongoengine.Document): """ A project is the highest controlling structure of an analysis and houses all the experiments, their associated FileGroups and the populations contained in each FileGroup and the populations clusters. Project can be used to create new experiments and to load existing experiments to interact with. Single cell data is stored in HDF5 files and the meta-data stored in MongoDB. When creating a Project you should specify where to store these HDF5 files. This data is stored locally and the local path is stored in 'data_directory'. This will be checked each time the object is initiated but can be changed using the 'update_data_directory' method. Attributes ---------- project_id: str, required unique identifier for project subjects: list List of references for associated subjects; see Subject start_date: DateTime date of creation owner: str, required user name of owner experiments: list List of references for associated fcs files data_directory: str, required Path to the local directory for storing HDF5 files. """ project_id = mongoengine.StringField(required=True, unique=True) subjects = mongoengine.ListField(mongoengine.ReferenceField(Subject, reverse_delete_rule=4)) start_date = mongoengine.DateTimeField(default=datetime.datetime.now) owner = mongoengine.StringField(requred=True) experiments = mongoengine.ListField(mongoengine.ReferenceField(Experiment, reverse_delete_rule=4)) data_directory = mongoengine.StringField(required=True) meta = { 'db_alias': 'core', 'collection': 'projects' } def __init__(self, *args, **values): super().__init__(*args, **values) if not os.path.isdir(self.data_directory): warn(f"Could not locate data directory at path {self.data_directory}, all further operations " f"will likely resolve in errors as single cell data will not be attainable. Update the " f"data directory before continuing using the 'update_data_directory' method.", stacklevel=2) def update_data_directory(self, data_directory: str, move: bool = True): """ Update the data directory for this Project. It is recommended that you let cytopy migrate the existing directory by letting 'move' equal True. Parameters ---------- data_directory: str Local path to HDF5 data move: bool (default=True) If True, will attempt to move the existing data_directory Returns ------- None Raises ------ InvalidDataDirectory If provided path does not exist """ if not os.path.isdir(data_directory): raise InvalidDataDirectory(f"Could not find directory at path {data_directory}") for e in self.experiments: for f in e.fcs_files: f.data_directory = data_directory f.save() e.data_directory = data_directory e.save() if move: for f in os.listdir(self.data_directory): shutil.move(os.path.join(self.data_directory, f), data_directory) shutil.rmtree(self.data_directory) self.data_directory = data_directory self.save() def get_experiment(self, experiment_id: str) -> Experiment: """ Load the experiment object for a given experiment ID Parameters ---------- experiment_id: str experiment to load Returns -------- Experiment Raises ------- MissingExperimentError If requested experiment does not exist in this project """ try: return [e for e in self.experiments if e.experiment_id == experiment_id][0] except IndexError: raise MissingExperimentError(f"Invalid experiment; {experiment_id} does not exist") def add_experiment(self, experiment_id: str, panel_definition: str or dict) -> Experiment: """ Add new experiment to project. Note you must provide either a path to an excel template for the panel definition (panel_definition) or the name of an existing panel (panel_name). If panel_definition is provided, then the panel_name will be used to name the new Panel document associated to this experiment. If no panel_name is provided, then the panel name will default to "{experiment_id}_panel". Parameters ----------- experiment_id: str experiment name panel_definition: str or dict Path to excel template for generating the panel Returns -------- Experiment Newly created FCSExperiment Raises ------- DuplicateExperimentError If given experiment ID already exists """ if experiment_id in [x.experiment_id for x in self.experiments]: raise DuplicateExperimentError(f"Experiment with id {experiment_id} already exists!") exp = Experiment(experiment_id=experiment_id, data_directory=self.data_directory) exp.generate_panel(panel_definition=panel_definition) exp.save() self.experiments.append(exp) self.save() return exp def add_subject(self, subject_id: str, **kwargs) -> Subject: """ Create a new subject and associated to project; a subject is an individual element of a study e.g. a patient or a mouse Parameters ----------- subject_id: str subject ID for the new subject kwargs: Additional keyword arguments to pass to Subject initialisation (see cytopy.data.subject.Subject) Returns -------- None Raises ------- DuplicateSubjectError If subject already exists """ if subject_id in [x.subject_id for x in self.subjects]: raise DuplicateSubjectError(f"Subject with ID {subject_id} already exists") new_subject = Subject(subject_id=subject_id, **kwargs) new_subject.save() self.subjects.append(new_subject) self.save() return new_subject def list_subjects(self) -> list: """ Generate a list of subject ID for subjects associated to this project Returns -------- List List of subject IDs """ return [s.subject_id for s in self.subjects] def list_experiments(self) -> list: """ Lists experiments in project Returns ------- List """ return [e.experiment_id for e in self.experiments] def get_subject(self, subject_id: str) -> Subject: """ Given a subject ID associated to Project, return the Subject document Parameters ----------- subject_id: str subject ID to pull Returns -------- Subject Raises ------- MissingSubjectError If desired subject does not exist """ if subject_id not in self.list_subjects(): raise MissingSubjectError(f"Invalid subject ID {subject_id}, does not exist") return Subject.objects(subject_id=subject_id).get() def delete_experiment(self, experiment_id: str): """ Delete experiment Parameters ---------- experiment_id: str Returns ------- None """ if experiment_id not in self.list_experiments(): raise MissingExperimentError(f"No such experiment {experiment_id}") exp = self.get_experiment(experiment_id) exp.delete() def delete(self, delete_h5_data: bool = True, *args, **kwargs) -> None: """ Delete project (wrapper function of mongoengine.Document.delete) Parameters ----------- delete_h5_data: bool (default=True) Delete associated HDF5 data args: positional arguments to pass to parent call (see mongoengine.Document.delete) kwargs: keyword arguments to pass to parent call (see mongoengine.Document.delete) Returns -------- None """ for p in self.subjects: p.delete() for e in self.experiments: e.delete() super().delete(*args, **kwargs) if delete_h5_data: shutil.rmtree(self.data_directory)
class PeriodicTaskInfo(me.Document): class Lock(me.EmbeddedDocument): id = me.StringField(default=lambda: uuid.uuid4().hex) created_at = me.DateTimeField(default=datetime.datetime.now()) # Unique task identifier. key = me.StringField(primary_key=True) # Track successes/failures for autodisabling. last_success = me.DateTimeField() last_failure = me.DateTimeField() failures_count = me.IntField(default=0) # Lock to prevent concurrent running of the same task. lock = me.EmbeddedDocumentField(Lock) # Class attributes (NOT FIELDS). This define constants on the class. # Subclasses may override by setting attributes, dynamic properties or # fields. # Task won't be autodisabled if it has failed less times. min_failures_count = 50 # Task won't be autodisabled if it has succeeded in this period. min_failures_period = datetime.timedelta(hours=1) # Task will be autodisabled if it has failed more times. max_failures_count = 100 # Task will be autodisabled if it hasn't succeeded in this period. max_failures_period = datetime.timedelta(days=2) # Lock will be broken if it was last acquired more than this time ago. break_lock_after = datetime.timedelta(seconds=300) # Abort task if previous attempt was in less than this time before. min_interval = datetime.timedelta(seconds=5) @classmethod def get_or_add(cls, key): try: task = cls.objects.get(key=key) except cls.DoesNotExist: log.info("PeriodicTaskInfo for '%s' missing, will create.", key) task = cls(key=key) try: task.save() except me.NotUniqueError: # Work around race condition where document was created since # we checked. log.warning( "PeriodicTaskInfo for '%s' creation race " "condition, will reload.", key) task = cls.objects.get(key=key) log.debug("Loaded PeriodicTaskInfo for '%s'.", key) return task def get_last_run(self): if self.last_success and self.last_failure: return max(self.last_success, self.last_failure) return self.last_success or self.last_failure def check_failures_threshold_exceeded(self): """Raise PeriodicTaskThresholdExceeed if task should be autodisabled""" now = datetime.datetime.now() # If it hasn't run recently, then allow it to run. last_run = self.get_last_run() if not last_run or now - last_run > datetime.timedelta(days=1): return # Not exceeded if it hasn't failed enough times. if self.min_failures_count is not None: if self.failures_count < self.min_failures_count: return # Not exceeded if it hasn't failed for long enough. if self.min_failures_period is not None: if now - self.last_failure < self.min_failures_period: return # Exceeded if it has failed too many times. if self.max_failures_count is not None: if self.failures_count > self.max_failures_count: raise PeriodicTaskThresholdExceeded() # Exceed if it has been failing for too long. if self.max_failures_period is not None: if now - self.last_failure > self.max_failures_period: raise PeriodicTaskThresholdExceeded() # None of the conditions matched, so threshold hasn't been exceeded. return def check_too_soon(self): """Raise error if task has been run too recently""" now = datetime.datetime.now() # Find last run. If too recent, abort. if self.min_interval: last_run = self.get_last_run() if last_run: if now - last_run < self.min_interval: raise PeriodicTaskTooRecentLastRun() def acquire_lock(self, attempts=1, retry_sleep=1): """Acquire run lock""" # Is another same task running? for i in range(attempts): if not self.lock: break if self.break_lock_after: now = datetime.datetime.now() if now - self.lock.created_at > self.break_lock_after: # Has been running for too long or has died. Ignore. log.error( "Other task '%s' seems to have started, but " "it's been quite a while, will ignore and run.", self.key) break if i < attempts - 1: time.sleep(retry_sleep) self.reload() else: log.warning("Lock for task '%s' is taken.", self.key) raise PeriodicTaskLockTakenError() self.lock = self.Lock() self.save() def release_lock(self): lock_id = self.lock.id self.reload() if not self.lock or lock_id != self.lock.id: log.error( "Someone broke our lock for task '%s' since we " "acquired it!", self.key) return self.lock = None self.save() @contextlib.contextmanager def task_runner(self, persist=False): """Context manager to run periodic tasks that update model state This is a context manager, so it meant be used in a `with` statement, like this: with task_runner('unique-task-key'): do_something() What this does: 1. Takes care of using locks to prevent concurrent runs of the same task. 2. Tracks last success, last failure, and failure count of this task. """ if not persist: self.check_failures_threshold_exceeded() self.check_too_soon() self.acquire_lock(attempts=60 if persist else 1) try: yield except Exception: self.last_failure = datetime.datetime.now() self.failures_count += 1 raise else: self.last_success = datetime.datetime.now() self.failures_count = 0 finally: self.last_attempt_started = None self.save() self.release_lock() def __str__(self): return '%s: %s' % (self.__class__.__name__, self.id)
class MongoDocument(mongoengine.Document): meta = {'collection': 'IntegrationTestDateTimeField'} date = mongoengine.DateTimeField()
class UserProfile(me.Document): """ User profile model which has other social data and related user info """ first_name = me.StringField(max_length=30) last_name = me.StringField(max_length=30) google = me.DictField() github = me.DictField() avatar = me.URLField() facebook_handle = me.StringField(max_length=100) twitter_handle = me.StringField(max_length=100) linkedin_handle = me.StringField(max_length=100) description = me.StringField(max_length=500) company = me.StringField(max_length=100) designation = me.StringField(max_length=100) mobile = me.IntField() created_at = me.DateTimeField(default=datetime.utcnow) updated_at = me.DateTimeField(default=datetime.utcnow) created_by = me.ReferenceField('User', required=False) updated_by = me.ReferenceField('User', required=False) is_active = me.BooleanField(default=True) def __str__(self): return ' '.join([self.first_name, self.last_name]) def __init__(self, *args, **kwargs): super(UserProfile, self).__init__(*args, **kwargs) @classmethod def pre_save(cls, sender, document, **kwargs): if document.google: if UserProfile.objects.filter( google__email=document.google['email']).count() > 0: raise ValidationError('This account is already registered.') if document.github: if UserProfile.objects.filter( github__email=document.github['email']).count() > 0: raise ValidationError('This account is already registered.') if document.description and len(document.description) > 500: raise ValidationError('Description exceeds 500 characters') if document.facebook_handle and len(document.facebook_handle) > 100: raise ValidationError('Facebook id exceeds 100 characters') if document.twitter_handle and len(document.twitter_handle) > 100: raise ValidationError('Twitter id exceeds 100 characters') if document.linkedin_handle and len(document.linkedin_handle) > 100: raise ValidationError('Linked-in id exceeds 100 characters') if document.company and len(document.company) > 100: raise ValidationError('Company exceeds 100 characters') if document.designation and len(document.designation) > 100: raise ValidationError('Designation exceeds 100 characters') if document.first_name and len(document.first_name) > 30: raise ValidationError('First name exceeds 30 characters') if document.last_name and len(document.last_name) > 30: raise ValidationError('Last name exceeds 30 characters') if document.mobile and len(str(document.mobile)) > 13: raise ValidationError('Mobile number exceeds 13 characters') @classmethod def post_save(cls, sender, document, **kwargs): if not document.first_name and not document.last_name: first_name = last_name = '' if document.google: if 'first_name' in document.google.keys(): first_name = document.google['first_name'] if 'last_name' in document.google.keys(): last_name = document.google['last_name'] if document.github: try: first_name, last_name = document.github['name'].split(' ') except ValueError: first_name, last_name = document.github['name'], '' except KeyError: pass document.update(set__first_name=first_name, set__last_name=last_name) def save(self, *args, **kwargs): super(UserProfile, self).save(*args, **kwargs) self.reload() def update(self, *args, **kwargs): super(UserProfile, self).update(*args, **kwargs) self.reload() def to_json(self, fields=None, exclude=None): return json_dumper(self, fields, exclude)
class PollingSchedule(ShardedScheduleMixin, me.Document): meta = { 'allow_inheritance': True, 'strict': False, 'indexes': ['shard_id'] } # We use a unique name for easy identification and to avoid running the # same schedule twice. The name is autopopulated during the invocation of # the `clean` method. name = me.StringField(unique=True) # The following fields are defined in celerybeatmongo.models.PeriodicTask. # Here, we define no fields in the base class, and expect subclasses to # either define their fields, or simply use properties. # task = me.StringField(required=True) # args = me.ListField() # kwargs = me.DictField() # Scheduling information. Don't edit them directly, just use the model # methods. default_interval = me.EmbeddedDocumentField( PollingInterval, required=True, default=PollingInterval(every=0)) override_intervals = me.EmbeddedDocumentListField(PollingInterval) # Optional arguments. queue = me.StringField() exchange = me.StringField() routing_key = me.StringField() soft_time_limit = me.IntField() # Used internally by the scheduler. last_run_at = me.DateTimeField() total_run_count = me.IntField(min_value=0) run_immediately = me.BooleanField() def get_name(self): """Construct name based on self.task""" try: return self.task.split('.')[-1] except NotImplementedError: return '%s: No task specified.' % self.__class__.__name__ def clean(self): """Automatically set value of name""" self.name = self.get_name() @property def task(self): """Return task name for this schedule Subclasses should define an attribute, property or field to do this. """ raise NotImplementedError() @property def args(self): """Return task args for this schedule""" return [str(self.id)] @property def kwargs(self): """Return task kwargs for this schedule""" return {} @property def enabled(self): """Whether this task is currently enabled or not""" return bool(self.interval.timedelta) @property def interval(self): """Merge multiple intervals into one Returns a dynamic PollingInterval, with the highest frequency of any override schedule or the default schedule. """ interval = self.default_interval for i in self.override_intervals: if not i.expired(): if not interval.timedelta or i.timedelta < interval.timedelta: interval = i return interval @property def schedule(self): """Return a celery schedule instance This is used internally by celerybeatmongo scheduler """ return celery.schedules.schedule(self.interval.timedelta) @property def expires(self): return None def add_interval(self, interval, ttl=300, name=''): """Add an override schedule to the scheduled task Override schedules must define an interval in seconds, as well as a TTL (time to live), also in seconds. Override schedules cannot be removed, so short TTL's should be used. You can however add a new override schedule again, thus practically extending the time where an override is in effect. Override schedules can only increase, not decrease frequency of the schedule, in relation to that define in the `default_interval`. """ assert isinstance(interval, int) and interval > 0 assert isinstance(ttl, int) and 0 < ttl < 3600 expires = datetime.datetime.now() + datetime.timedelta(seconds=ttl) self.override_intervals.append( PollingInterval(name=name, expires=expires, every=interval)) def cleanup_expired_intervals(self): """Remove override schedules that have expired""" self.override_intervals = [ override for override in self.override_intervals if not override.expired() ] def set_default_interval(self, interval): """Set default interval This is the interval used for this schedule, if there is no active override schedule with a smaller interval. The default interval never expires. To disable a task, simply set `enabled` equal to False. """ self.default_interval = PollingInterval(name='default', every=interval) def __unicode__(self): return "%s %s" % (self.get_name(), self.interval or '(no interval)')
class Schedule(me.Document, ConditionalClassMixin): """Abstract base class for every schedule attr mongoengine model. This model is based on celery periodic task and creates defines the fields common to all schedules of all types. For each different schedule type, a subclass should be created adding any schedule specific fields and methods. Documents of all Schedule subclasses will be stored on the same mongo collection. One can perform a query directly on Schedule to fetch all cloud types, like this: Schedule.objects(owner=owner).count() """ condition_resource_cls = Machine meta = { 'collection': 'schedules', 'allow_inheritance': True, 'indexes': [ { 'fields': ['owner', 'name', 'deleted'], 'sparse': False, 'unique': True, 'cls': False, }, ], } id = me.StringField(primary_key=True, default=lambda: uuid4().hex) name = me.StringField(required=True) description = me.StringField() deleted = me.DateTimeField() owner = me.ReferenceField(Organization, required=True) # celery periodic task specific fields queue = me.StringField() exchange = me.StringField() routing_key = me.StringField() soft_time_limit = me.IntField() # mist specific fields schedule_type = me.EmbeddedDocumentField(BaseScheduleType, required=True) task_type = me.EmbeddedDocumentField(BaseTaskType, required=True) # celerybeat-mongo specific fields expires = me.DateTimeField() start_after = me.DateTimeField() task_enabled = me.BooleanField(default=False) run_immediately = me.BooleanField() last_run_at = me.DateTimeField() total_run_count = me.IntField(min_value=0, default=0) max_run_count = me.IntField(min_value=0, default=0) no_changes = False def __init__(self, *args, **kwargs): # FIXME import mist.api.schedules.base super(Schedule, self).__init__(*args, **kwargs) self.ctl = mist.api.schedules.base.BaseController(self) def owner_query(self): return me.Q(cloud__in=Cloud.objects(owner=self.owner).only('id')) @classmethod def add(cls, auth_context, name, **kwargs): """Add schedule This is a class method, meaning that it is meant to be called on the class itself and not on an instance of the class. You're not meant to be calling this directly, but on a schedule class instead like this: schedule = Schedule.add(owner=owner, **kwargs) """ owner = auth_context.owner if not name: raise RequiredParameterMissingError('name') if not owner or not isinstance(owner, Organization): raise BadRequestError('owner') if Schedule.objects(owner=owner, name=name, deleted=None): raise ScheduleNameExistsError() schedule = cls(owner=owner, name=name) schedule.ctl.set_auth_context(auth_context) schedule.ctl.add(**kwargs) return schedule @property def schedule(self): if self.schedule_type: return self.schedule_type.schedule else: raise Exception("must define interval, crontab, one_off schedule") @property def args(self): mids = [ machine.id for machine in self.get_resources() if machine.state != 'terminated' ] fire_up = self.task_type.args return [self.owner.id, fire_up, self.name, mids] @property def kwargs(self): return {} @property def task(self): return self.task_type.task @property def enabled(self): if self.deleted: return False if not self.get_resources().count(): return False if self.expires and self.expires < datetime.datetime.now(): return False # if self.start_after and self.start_after < datetime.datetime.now(): # return False if self.max_run_count and ( (self.total_run_count or 0) >= self.max_run_count): return False else: return self.task_enabled def __unicode__(self): fmt = '{0.name}: {{no schedule}}' if self.schedule_type: fmt = 'name: {0.name} type: {0.schedule_type._cls}' else: raise Exception("must define interval or crontab schedule") return fmt.format(self) def validate(self, clean=True): """ Override mongoengine validate. We should validate crontab entry. Use crontab_parser for crontab expressions. The parser is a general purpose one, useful for parsing hours, minutes and day_of_week expressions. example for minutes: minutes = crontab_parser(60).parse('*/15') [0, 15, 30, 45] """ if isinstance(self.schedule_type, Crontab): cronj_entry = self.schedule_type.as_dict() try: for k, v in cronj_entry.items(): if k == 'minute': celery.schedules.crontab_parser(60).parse(v) elif k == 'hour': celery.schedules.crontab_parser(24).parse(v) elif k == 'day_of_week': celery.schedules.crontab_parser(7).parse(v) elif k == 'day_of_month': celery.schedules.crontab_parser(31, 1).parse(v) elif k == 'month_of_year': celery.schedules.crontab_parser(12, 1).parse(v) else: raise me.ValidationError( 'You should provide valid period of time') except celery.schedules.ParseException: raise me.ValidationError('Crontab entry is not valid') except Exception as exc: raise me.ValidationError('Crontab entry is not valid:%s' % exc.message) super(Schedule, self).validate(clean=True) def delete(self): super(Schedule, self).delete() Tag.objects(resource=self).delete() self.owner.mapper.remove(self) def as_dict(self): # Return a dict as it will be returned to the API last_run = '' if self.total_run_count == 0 else str(self.last_run_at) conditions = [condition.as_dict() for condition in self.conditions] sdict = { 'id': self.id, 'name': self.name, 'description': self.description or '', 'schedule': unicode(self.schedule_type), 'schedule_type': self.schedule_type.type, 'schedule_entry': self.schedule_type.as_dict(), 'task_type': str(self.task_type), 'expires': str(self.expires or ''), 'start_after': str(self.start_after or ''), 'task_enabled': self.task_enabled, 'active': self.enabled, 'run_immediately': self.run_immediately or '', 'last_run_at': last_run, 'total_run_count': self.total_run_count, 'max_run_count': self.max_run_count, 'conditions': conditions, } return sdict
class ResetPasswordToken(gj.Document): token = mongo.StringField(required=True) used = mongo.BooleanField(default=False) expiry_time = mongo.DateTimeField(default=utc_right_now) meta = {'auto_create_index': False}
class Machine(OwnershipMixin, me.Document): """The basic machine model""" id = me.StringField(primary_key=True, default=lambda: uuid.uuid4().hex) cloud = me.ReferenceField('Cloud', required=True) owner = me.ReferenceField('Organization', required=True) location = me.ReferenceField('CloudLocation', required=False) size = me.ReferenceField('CloudSize', required=False) network = me.ReferenceField('Network', required=False) subnet = me.ReferenceField('Subnet', required=False) name = me.StringField() # Info gathered mostly by libcloud (or in some cases user input). # Be more specific about what this is. # We should perhaps come up with a better name. machine_id = me.StringField(required=True) hostname = me.StringField() public_ips = me.ListField() private_ips = me.ListField() ssh_port = me.IntField(default=22) OS_TYPES = ('windows', 'coreos', 'freebsd', 'linux', 'unix') os_type = me.StringField(default='unix', choices=OS_TYPES) rdp_port = me.IntField(default=3389) actions = me.EmbeddedDocumentField(Actions, default=lambda: Actions()) extra = me.DictField() cost = me.EmbeddedDocumentField(Cost, default=lambda: Cost()) image_id = me.StringField() # libcloud.compute.types.NodeState state = me.StringField(default='unknown', choices=('running', 'starting', 'rebooting', 'terminated', 'pending', 'unknown', 'stopping', 'stopped', 'suspended', 'error', 'paused', 'reconfiguring')) machine_type = me.StringField(default='machine', choices=('machine', 'vm', 'container', 'hypervisor', 'container-host')) parent = me.ReferenceField('Machine', required=False) # We should think this through a bit. key_associations = me.EmbeddedDocumentListField(KeyAssociation) last_seen = me.DateTimeField() missing_since = me.DateTimeField() unreachable_since = me.DateTimeField() created = me.DateTimeField() monitoring = me.EmbeddedDocumentField(Monitoring, default=lambda: Monitoring()) ssh_probe = me.EmbeddedDocumentField(SSHProbe, required=False) ping_probe = me.EmbeddedDocumentField(PingProbe, required=False) # Number of vCPUs gathered from various sources. This field is meant to # be updated ONLY by the mist.api.metering.tasks:find_machine_cores task. cores = me.IntField() meta = { 'collection': 'machines', 'indexes': [{ 'fields': ['cloud', 'machine_id'], 'sparse': False, 'unique': True, 'cls': False, }, { 'fields': ['monitoring.installation_status.activated_at'], 'sparse': True, 'unique': False }], 'strict': False, } def __init__(self, *args, **kwargs): super(Machine, self).__init__(*args, **kwargs) self.ctl = MachineController(self) def clean(self): # Remove any KeyAssociation, whose `keypair` has been deleted. Do NOT # perform an atomic update on self, but rather remove items from the # self.key_associations list by iterating over it and popping matched # embedded documents in order to ensure that the most recent list is # always processed and saved. for ka in reversed(range(len(self.key_associations))): if self.key_associations[ka].keypair.deleted: self.key_associations.pop(ka) # Populate owner field based on self.cloud.owner if not self.owner: self.owner = self.cloud.owner self.clean_os_type() if self.monitoring.method not in config.MONITORING_METHODS: self.monitoring.method = config.DEFAULT_MONITORING_METHOD def clean_os_type(self): """Clean self.os_type""" if self.os_type not in self.OS_TYPES: for os_type in self.OS_TYPES: if self.os_type.lower() == os_type: self.os_type = os_type break else: self.os_type = 'unix' def delete(self): super(Machine, self).delete() mist.api.tag.models.Tag.objects(resource=self).delete() try: self.owner.mapper.remove(self) except (AttributeError, me.DoesNotExist) as exc: log.error(exc) try: if self.owned_by: self.owned_by.get_ownership_mapper(self.owner).remove(self) except (AttributeError, me.DoesNotExist) as exc: log.error(exc) def as_dict(self): # Return a dict as it will be returned to the API # tags as a list return for the ui tags = { tag.key: tag.value for tag in mist.api.tag.models.Tag.objects( resource=self).only('key', 'value') } # Optimize tags data structure for js... if isinstance(tags, dict): tags = [{ 'key': key, 'value': value } for key, value in tags.iteritems()] return { 'id': self.id, 'hostname': self.hostname, 'public_ips': self.public_ips, 'private_ips': self.private_ips, 'name': self.name, 'ssh_port': self.ssh_port, 'os_type': self.os_type, 'rdp_port': self.rdp_port, 'machine_id': self.machine_id, 'actions': {action: self.actions[action] for action in self.actions}, 'extra': self.extra, 'cost': self.cost.as_dict(), 'image_id': self.image_id, 'state': self.state, 'tags': tags, 'monitoring': self.monitoring.as_dict() if self.monitoring and self.monitoring.hasmonitoring else '', 'key_associations': [ka.as_dict() for ka in self.key_associations], 'cloud': self.cloud.id, 'location': self.location.id if self.location else '', 'size': self.size.name if self.size else '', 'cloud_title': self.cloud.title, 'last_seen': str(self.last_seen.replace(tzinfo=None) if self.last_seen else ''), 'missing_since': str( self.missing_since.replace( tzinfo=None) if self.missing_since else ''), 'unreachable_since': str( self.unreachable_since.replace( tzinfo=None) if self.unreachable_since else ''), 'created': str(self.created.replace(tzinfo=None) if self.created else ''), 'machine_type': self.machine_type, 'parent_id': self.parent.id if self.parent is not None else '', 'probe': { 'ping': (self.ping_probe.as_dict() if self.ping_probe is not None else PingProbe().as_dict()), 'ssh': (self.ssh_probe.as_dict() if self.ssh_probe is not None else SSHProbe().as_dict()), }, 'cores': self.cores, 'network': self.network.id if self.network else '', 'subnet': self.subnet.id if self.subnet else '', 'owned_by': self.owned_by.id if self.owned_by else '', 'created_by': self.created_by.id if self.created_by else '', } def __str__(self): return 'Machine %s (%s) in %s' % (self.name, self.id, self.cloud)
class Lock(me.EmbeddedDocument): id = me.StringField(default=lambda: uuid.uuid4().hex) created_at = me.DateTimeField(default=datetime.datetime.now())
class News(me.Document): title = me.StringField(min_length=2, max_length=512) body = me.StringField(min_length=10, max_length=4096) pub_date = me.DateTimeField(default=datetime.datetime.now)
class Users(medb.Document): name = medb.StringField(required=True,max_length=50,unique = True) email = medb.EmailField(required=True,max_length=30,unique= True) password = medb.StringField(required=True) datecreated = medb.DateTimeField(default=datetime.datetime.utcnow)
class User(mongo.Document): meta = {'queryset_class': CustomUserQuerySet, 'allow_inheritance': True} full_name = mongo.StringField(verbose_name=_('full name'), max_length=30) email = mongo.EmailField(verbose_name=_('email address'), required=True, unique=True) password = mongo.StringField(max_length=128, verbose_name=_('password')) last_login = mongo.DateTimeField(verbose_name=_('last login'), default=timezone.now) is_staff = mongo.BooleanField(default=False) is_active = mongo.BooleanField(default=True) is_superuser = mongo.BooleanField(default=False) site_id = mongo.IntField(required=True, default=settings.SITE_ID) current_role = mongo.StringField(required=False) image = mongo.StringField(required=False) user_types = mongo.ListField(mongo.ReferenceField(UserTypes), required=True, verbose_name=_('User Types')) primary_user_type = mongo.ReferenceField( UserTypes, required=False, verbose_name=_('Primary User Type')) organization = mongo.ReferenceField(Organization, required=False, verbose_name=_('Organization')) custom_field_form = mongo.ReferenceField("FormSchema", required=False) activation_key = mongo.StringField(required=False) key_expires = mongo.DateTimeField(required=False) ban = mongo.BooleanField(default=False) ban_reason = mongo.StringField(max_length=255, required=False) address = mongo.StringField(max_length=255, required=False) city = mongo.StringField(max_length=255, required=False) state = mongo.StringField(max_length=255, required=False) zip = mongo.StringField(max_length=50, required=False) phone = mongo.StringField(max_length=20, required=False) description = mongo.StringField(required=False) point = mongo.PointField(required=False) #used for dashboard resource reference dashboard_resource_id = mongo.StringField(max_length=24, required=False) # TODO disabled for now, either needs to become a custom form, or hard coded like the address above # billing_phone = mongo.StringField(max_length=20, required=False, verbose_name=_('Billing Phone')) # shipping_phone = mongo.StringField(max_length=20, required=False, verbose_name=_('Shipping Phone')) # same_address = mongo.BooleanField(default=False, verbose_name=_('Is shipping adddress same as billing address')) # address_verified = mongo.BooleanField(default=False, verbose_name=_('Address verified by TaxCloud')) created = mongo.StringField(required=True, default=str(datetime.now())) updated = mongo.StringField(required=True, default=str(datetime.now())) USERNAME_FIELD = 'email' REQUIRED_FIELDS = ['full_name'] @mongo.queryset.queryset_manager def objects(doc_cls, queryset): # This may actually also be done by defining a default ordering for # the document, but this illustrates the use of manager methods return queryset.filter(site_id=settings.SITE_ID) @mongo.queryset.queryset_manager def all(doc_cls, queryset): # This may actually also be done by defining a default ordering for # the document, but this illustrates the use of manager methods return queryset.all() def save(self, *args, **kwargs): self.updated = datetime.now().strftime("%Y-%m-%d %H:%M:%S") if self.address and self.zip and self.city and self.state: address = (self.address + ", " + self.zip, self.city, self.state) geocoded_addr = verify_address(address) if geocoded_addr != 'Error': self.point = geocoded_addr super(User, self).save(*args, **kwargs) if not self.primary_user_type: self.set_primary_user_type() def set_current_role(self, role): self.current_role = role self.save() def set_primary_user_type(self, typ=None): admin = UserTypes.objects.get(name='Admin') if typ: typ = UserTypes.objects.get(name=typ) elif admin in self.user_types: typ = admin else: typ = self.user_types[0] self.primary_user_type = typ if self.site_id == 1: self.current_role = 'maps-admin' else: self.current_role = typ.name self.save() # def is_provider(self): # return len(self.user_types.filter(type='provider')) > 0 def get_username(self): "Return the identifying username for this User" return getattr(self, self.USERNAME_FIELD) def __unicode__(self): return self.get_username() def natural_key(self): return (self.get_username(), ) def is_anonymous(self): """ Always returns False. This is a way of comparing User objects to anonymous users. """ return False def is_authenticated(self): """ Always return True. This is a way to tell if the user has been authenticated in templates. """ return True def set_password(self, raw_password): self.password = make_password(raw_password) def check_password(self, raw_password): """ Returns a boolean of whether the raw_password was correct. Handles hashing formats behind the scenes. """ def setter(raw_password): self.set_password(raw_password) self.save(update_fields=["password"]) return check_password(raw_password, self.password, setter) def set_unusable_password(self): # Sets a value that will never be a valid hash self.password = make_password(None) def has_usable_password(self): return is_password_usable(self.password) def get_session_auth_hash(self): """ Returns an HMAC of the password field. """ key_salt = "maps.users.models.AbstractBaseUser.get_session_auth_hash" return salted_hmac(key_salt, self.password).hexdigest() def email_user(self, subject, message, from_email=None, **kwargs): """ Sends an email to this User. """ send_mail(subject, message, from_email, [self.email], **kwargs) @property def location_count(self): try: return Location.objects(created_by=str(self.id)).count() except mongo.ValidationError: return 0 @property def image_url(self): url = None if self.image: url = settings.MEDIA_URL + self.image return url def custom_form(self): output = [] try: if self.custom_field_form: serializer = FormSchemaIdSerializer(self.custom_field_form) output.append(serializer.data) except: pass return output def has_permission(self, obj_name, action): for ut in self.user_types: if ut.has_permission(obj_name, action): return True return False def has_edit_other_permission(self, obj_name): return self.has_permission(obj_name, '4') def has_delete_other_permission(self, obj_name): return self.has_permission(obj_name, '5') def has_edit_own_permission(self, obj_name): return self.has_permission(obj_name, '1') def has_delete_own_permission(self, obj_name): return self.has_permission(obj_name, '2') def can_delete(self, obj, perm_type=None): if not perm_type: perm_type = obj.permission_type if obj.created_by == self.id: if self.has_delete_own_permission(perm_type): return True else: if self.has_delete_other_permission(perm_type): return True return False def can_edit(self, obj, perm_type=None): if not perm_type: perm_type = obj.permission_type if obj.created_by == self.id: if self.has_edit_own_permission(perm_type): return True else: if self.has_edit_other_permission(perm_type): return True return False @property def permission_type(self): return "users"
class Job(mongoengine.Document): """Jobs are container units, holding one or more tasks.""" QUEUED = "QUEUED" PROCESSING = "PROCESSING" ERRED = "ERRED" FINISHED = "FINISHED" STATUSES = ( (QUEUED, "Queued"), (PROCESSING, "Processing"), (ERRED, "Erred"), (FINISHED, "Finished"), ) STANDALONE = "STANDALONE" FETCHABLE = "FETCHABLE" TYPES = ((STANDALONE, "Standalone"), (FETCHABLE, "Fetchable")) job_id = mongoengine.StringField(required=True, primary_key=True, max_length=MD5_LENGTH, min_length=MD5_LENGTH) status = mongoengine.StringField(choices=STATUSES, required=True, default=QUEUED, help_text="Job status.") job_type = mongoengine.StringField(choices=TYPES, required=True, help_text="Job type.") scheduled = mongoengine.DateTimeField(required=True, help_text="Start time of the job.") updated = mongoengine.DateTimeField(required=True, help_text="Time of last status update") tasks = mongoengine.ListField(mongoengine.ReferenceField(Task), required=True, help_text="References to subtasks of job.") options = mongoengine.DictField(help_text="Additional(free-form) options.") @property def is_queued(self): return self.status == self.QUEUED @property def is_processing(self): return self.status == self.PROCESSING @property def is_finished(self): return self.status == self.FINISHED @property def is_erred(self): return self.status == self.ERRED def save(self, *args, **kwargs): if not self.updated: # on creation, the updated field should be the same as scheduled self.updated = self.scheduled else: # on subsequent saves, update the updated field self.updated = datetime.datetime.utcnow() return super(Job, self).save(*args, **kwargs) @classmethod def generate_id(cls, *args): """Generate a unique job_id by feeding the hash object with the passed in arguments.""" return utils.hash_data(*args) @classmethod def create(cls, targets, job_type, **kwargs): """Create a new job from the passed in list of target(s). :param targets: Iterable containing URLs or filesystem paths :param kwargs: All kwargs are stored as additional options of the job :returns: ``Job`` instance """ creation_time = datetime.datetime.utcnow() # generate job_id from the current time + the passed in targets job_id = cls.generate_id(creation_time, *list(targets)) job = cls(job_id=job_id, job_type=job_type, scheduled=creation_time, options=kwargs) job.tasks = [Task.create(job_id, t) for t in targets] job.save() job.schedule() return job @classmethod def is_valid_type(cls, job_type): """Check whether the passed in job type is a valid one. :param job_type: A job type code(string)""" codes, _ = zip(*cls.TYPES) return job_type in codes def schedule(self): """Schedule the job for processing by a background worker.""" worker.dispatch({'type': self.job_type, 'id': self.job_id}) def retry(self): """Retry a previously failed job.""" self.mark_queued() self.schedule() def mark_queued(self): self.status = self.QUEUED self.save() def mark_processing(self): self.status = self.PROCESSING self.save() def mark_erred(self): self.status = self.ERRED self.save() def mark_finished(self): self.status = self.FINISHED self.save()
class FetchMetaData(mongoengine.Document): query_data = mongoengine.DictField() searched_at = mongoengine.DateTimeField() tweets = mongoengine.ListField(mongoengine.ReferenceField(Tweet))
class Task(mongoengine.Document): """Tasks are the smallest unit of work, which contain an exact target that needs to be processed.""" QUEUED = "QUEUED" PROCESSING = "PROCESSING" FAILED = "FAILED" FINISHED = "FINISHED" STATUSES = ( (QUEUED, "Queued"), (PROCESSING, "Processing"), (FAILED, "Failed"), (FINISHED, "Finished"), ) meta = {'indexes': ['md5']} job_id = mongoengine.StringField(required=True, max_length=MD5_LENGTH, min_length=MD5_LENGTH, help_text="ID of parent Job.") target = mongoengine.StringField(required=True, help_text="Target URL or filesystem path") md5 = mongoengine.StringField(max_length=MD5_LENGTH, min_length=MD5_LENGTH, help_text="MD5 hexdigest of target.") title = mongoengine.StringField(help_text="Processed page title.") size = mongoengine.IntField(min_value=0, help_text="Size of page in bytes.") images = mongoengine.IntField(min_value=0, help_text="Number of images on the page.") timestamp = mongoengine.DateTimeField(help_text="End time of task.") status = mongoengine.StringField(choices=STATUSES, default=QUEUED, help_text="Job status.") notes = mongoengine.StringField(help_text="Arbitary information") @classmethod def create(cls, job_id, target): """Create a new task for the passed in target. :param job_id: The string ID of the parent job instance :param target: The target URL or filesystem path :returns: ``Task`` instance """ task = cls(job_id=job_id, target=target) task.save() return task @property def is_queued(self): return self.status == self.QUEUED @property def is_processing(self): return self.status == self.PROCESSING @property def is_finished(self): return self.status == self.FINISHED @property def is_failed(self): return self.status == self.FAILED @property def download_link(self): url_template = settings.BOTTLE_CONFIG['artexin.zipball_url_template'] return url_template.format(self.md5) @property def zipball_path(self): zipball_root = settings.BOTTLE_CONFIG['artexin.out_dir'] filename = '{0}.zip'.format(self.md5) return os.path.join(zipball_root, filename) def mark_queued(self): self.status = self.QUEUED self.save() def mark_processing(self): self.status = self.PROCESSING self.save() def mark_failed(self, reason): self.status = self.FAILED self.notes = reason self.save() def mark_finished(self): self.status = self.FINISHED self.notes = '' self.save()
class MUserSearch(mongo.Document): '''Search index state of a user's subscriptions.''' user_id = mongo.IntField(unique=True) last_search_date = mongo.DateTimeField() subscriptions_indexed = mongo.BooleanField() subscriptions_indexing = mongo.BooleanField() meta = { 'collection': 'user_search', 'indexes': ['user_id'], 'allow_inheritance': False, } @classmethod def get_user(cls, user_id, create=True): try: user_search = cls.objects.read_preference(pymongo.ReadPreference.PRIMARY)\ .get(user_id=user_id) except cls.DoesNotExist: if create: user_search = cls.objects.create(user_id=user_id) else: user_search = None return user_search def touch_search_date(self): if not self.subscriptions_indexed and not self.subscriptions_indexing: self.schedule_index_subscriptions_for_search() self.subscriptions_indexing = True self.last_search_date = datetime.datetime.now() self.save() def schedule_index_subscriptions_for_search(self): IndexSubscriptionsForSearch.apply_async( kwargs=dict(user_id=self.user_id), queue='search_indexer') # Should be run as a background task def index_subscriptions_for_search(self): from apps.rss_feeds.models import Feed from apps.reader.models import UserSubscription SearchStory.create_elasticsearch_mapping() start = time.time() user = User.objects.get(pk=self.user_id) r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL) r.publish(user.username, 'search_index_complete:start') subscriptions = UserSubscription.objects.filter(user=user).only('feed') total = subscriptions.count() feed_ids = [] for sub in subscriptions: try: feed_ids.append(sub.feed.pk) except Feed.DoesNotExist: continue feed_id_chunks = [c for c in chunks(feed_ids, 6)] logging.user( user, "~FCIndexing ~SB%s feeds~SN in %s chunks..." % (total, len(feed_id_chunks))) search_chunks = [ IndexSubscriptionsChunkForSearch.s( feed_ids=feed_id_chunk, user_id=self.user_id).set(queue='search_indexer') for feed_id_chunk in feed_id_chunks ] callback = FinishIndexSubscriptionsForSearch.s( user_id=self.user_id, start=start).set(queue='search_indexer') celery.chord(search_chunks)(callback) def finish_index_subscriptions_for_search(self, start): from apps.reader.models import UserSubscription r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL) user = User.objects.get(pk=self.user_id) subscriptions = UserSubscription.objects.filter(user=user).only('feed') total = subscriptions.count() duration = time.time() - start logging.user( user, "~FCIndexed ~SB%s feeds~SN in ~FM~SB%s~FC~SN sec." % (total, round(duration, 2))) r.publish(user.username, 'search_index_complete:done') self.subscriptions_indexed = True self.subscriptions_indexing = False self.save() def index_subscriptions_chunk_for_search(self, feed_ids): from apps.rss_feeds.models import Feed r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL) user = User.objects.get(pk=self.user_id) logging.user(user, "~FCIndexing %s feeds..." % len(feed_ids)) for feed_id in feed_ids: feed = Feed.get_by_id(feed_id) if not feed: continue feed.index_stories_for_search() r.publish( user.username, 'search_index_complete:feeds:%s' % ','.join([str(f) for f in feed_ids])) @classmethod def schedule_index_feeds_for_search(cls, feed_ids, user_id): user_search = cls.get_user(user_id, create=False) if (not user_search or not user_search.subscriptions_indexed or user_search.subscriptions_indexing): # User hasn't searched before. return if not isinstance(feed_ids, list): feed_ids = [feed_ids] IndexFeedsForSearch.apply_async(kwargs=dict(feed_ids=feed_ids, user_id=user_id), queue='search_indexer') @classmethod def index_feeds_for_search(cls, feed_ids, user_id): from apps.rss_feeds.models import Feed user = User.objects.get(pk=user_id) logging.user(user, "~SB~FCIndexing %s~FC by request..." % feed_ids) for feed_id in feed_ids: feed = Feed.get_by_id(feed_id) if not feed: continue feed.index_stories_for_search() @classmethod def remove_all(cls, drop_index=False): # You only need to drop the index if there is data you want to clear. # A new search server won't need this, as there isn't anything to drop. if drop_index: logging.info(" ---> ~FRRemoving stories search index...") SearchStory.drop() user_searches = cls.objects.all() logging.info(" ---> ~SN~FRRemoving ~SB%s~SN user searches..." % user_searches.count()) for user_search in user_searches: try: user_search.remove() except Exception as e: print(" ****> Error on search removal: %s" % e) def remove(self): from apps.rss_feeds.models import Feed from apps.reader.models import UserSubscription user = User.objects.get(pk=self.user_id) subscriptions = UserSubscription.objects.filter(user=self.user_id) total = subscriptions.count() removed = 0 for sub in subscriptions: try: feed = sub.feed except Feed.DoesNotExist: continue if not feed.search_indexed: continue feed.search_indexed = False feed.save() removed += 1 logging.user( user, "~FCRemoved ~SB%s/%s feed's search indexes~SN for ~SB~FB%s~FC~SN." % (removed, total, user.username)) self.delete()
class BonusCardTrx(mge.EmbeddedDocument): trx_id = mge.StringField(required=True) # номер транзакции trx_value = mge.IntField(required=True) # Сколько начислено бонусных единиц(миль) departure_airport = mge.StringField(required=True) # откуда arrival_airport = mge.StringField(required=True) # куда flight_date = mge.DateTimeField(required=True) # дата полёта
class MessageDocument(mongoengine.Document): message = mongoengine.StringField() timestamp = mongoengine.DateTimeField(default=datetime.datetime.now)
class LogNew(mongoengine.Document): milkUsed = mongoengine.StringField() timeStr = mongoengine.StringField() dateStr = mongoengine.StringField() timeStamp = mongoengine.DateTimeField(default=datetime.datetime.now()) todayStamp = mongoengine.DateTimeField(default=datetime.date.today())
class Project(mongoengine.Document): """ A project is the highest controlling structure of an analysis and houses all the experiments, their associated FileGroups and the populations contained in each FileGroup and the populations clusters. Project can be used to create new experiments and to load existing experiments to interact with. Attributes ---------- project_id: str, required unique identifier for project subjects: list List of references for associated subjects; see Subject start_date: DateTime date of creation owner: str, required user name of owner experiments: list List of references for associated fcs files """ project_id = mongoengine.StringField(required=True, unique=True) subjects = mongoengine.ListField( mongoengine.ReferenceField(Subject, reverse_delete_rule=4)) start_date = mongoengine.DateTimeField(default=datetime.datetime.now) owner = mongoengine.StringField(requred=True) experiments = mongoengine.ListField( mongoengine.ReferenceField(Experiment, reverse_delete_rule=4)) meta = {'db_alias': 'core', 'collection': 'projects'} def list_experiments(self) -> Generator: """ Generate a list of associated flow cytometry experiments Returns ------- Generator list of experiment IDs """ for e in self.experiments: yield e.experiment_id def load_experiment(self, experiment_id: str) -> Experiment: """ Load the experiment object for a given experiment ID Parameters ---------- experiment_id: str experiment to load Returns -------- Experiment """ assert experiment_id in list(self.list_experiments( )), f'Error: no experiment {experiment_id} found' return Experiment.objects(experiment_id=experiment_id).get() def add_experiment(self, experiment_id: str, data_directory: str, panel_name: str or None = None, panel_definition: str or None = None) -> Experiment: """ Add new experiment to project. Note you must provide either a path to an excel template for the panel definition (panel_definition) or the name of an existing panel (panel_name). If panel_definition is provided, then the panel_name will be used to name the new Panel document associated to this experiment. If no panel_name is provided, then the panel name will default to "{experiment_id}_panel". Parameters ----------- experiment_id: str experiment name data_directory: str Path where experiment events data files will be stored panel_name: str (optional) Name of panel to associate to experiment panel_definition: str (optional) Path to excel template for generating the panel Returns -------- Experiment Newly created FCSExperiment """ err = f'Error: Experiment with id {experiment_id} already exists!' assert experiment_id not in list(self.list_experiments()), err exp = Experiment(experiment_id=experiment_id, panel_definition=panel_definition, panel_name=panel_name, data_directory=data_directory) exp.save() self.experiments.append(exp) self.save() return exp def add_subject(self, subject_id: str, drug_data: list or None = None, infection_data: list or None = None, patient_biology: list or None = None, **kwargs) -> Subject: """ Create a new subject and associated to project; a subject is an individual element of a study e.g. a patient or a mouse Parameters ----------- subject_id: str subject ID for the new subject drug_data: list, optional list of Drug documents to associated to subject (see cytopy.data.subject.Drug) infection_data: list, optional list of Bug documents to associated to subject (see cytopy.data.subject.Bug) patient_biology: list, optional list of Biology documents to associated to subject (see cytopy.data.subject.Biology) kwargs: Additional keyword arguments to pass to Subject initialisation (see cytopy.data.subject.Subject) Returns -------- None """ new_subject = Subject(subject_id=subject_id, **kwargs) if drug_data is not None: new_subject.drug_data = drug_data if infection_data is not None: new_subject.infection_data = infection_data if patient_biology is not None: new_subject.patient_biology = patient_biology new_subject.save() self.subjects.append(new_subject) self.save() return new_subject def list_subjects(self) -> Generator: """ Generate a list of subject ID for subjects associated to this project Returns -------- Generator List of subject IDs """ for s in self.subjects: yield s.subject_id def get_subject(self, subject_id: str) -> Subject: """ Given a subject ID associated to Project, return the Subject document Parameters ----------- subject_id: str subject ID to pull Returns -------- Subject """ assert subject_id in list(self.list_subjects()), f'Invalid subject ID, valid subjects: ' \ f'{list(self.list_subjects())}' return Subject.objects(subject_id=subject_id).get() def delete(self, *args, **kwargs) -> None: """ Delete project (wrapper function of mongoengine.Document.delete) Parameters ----------- args: positional arguments to pass to parent call (see mongoengine.Document.delete) kwargs: keyword arguments to pass to parent call (see mongoengine.Document.delete) Returns -------- None """ experiments = [ self.load_experiment(e) for e in list(self.list_experiments()) ] for e in experiments: samples = e.list_samples() for s in samples: e.remove_sample(s) e.delete() for p in self.subjects: p.delete() super().delete(*args, **kwargs)
class SSHProbe(me.EmbeddedDocument): uptime = me.FloatField() # seconds loadavg = me.ListField(me.FloatField()) cores = me.IntField() users = me.IntField() pub_ips = me.ListField(me.StringField()) priv_ips = me.ListField(me.StringField()) macs = me.ListField(me.StringField()) df = me.StringField() kernel = me.StringField() os = me.StringField() os_version = me.StringField() distro = me.StringField() dirty_cow = me.BooleanField() updated_at = me.DateTimeField() unreachable_since = me.DateTimeField() meta = {'strict': False} def update_from_dict(self, data): uptime = data.get('uptime') try: self.uptime = float(uptime) except (ValueError, TypeError): log.error("Invalid uptime value: %s", uptime) self.uptime = 0 loadavg = data.get('loadavg') try: assert isinstance(loadavg, list) assert len(loadavg) == 3 for i in range(3): loadavg[i] = float(loadavg[i]) self.loadavg = loadavg except Exception as exc: log.error("Invalid loadavg '%s': %r", loadavg, exc) self.loadavg = [] for int_attr in ('cores', 'users'): val = data.get(int_attr) try: setattr(self, int_attr, int(val)) except Exception as exc: log.error("Invalid %s '%s': %r", int_attr, val, exc) setattr(self, int_attr, 0) for strarr_attr in ('pub_ips', 'priv_ips', 'macs'): val = data.get(strarr_attr) try: assert isinstance(val, list) assert all(isinstance(item, basestring) for item in val) setattr(self, strarr_attr, val) except Exception as exc: log.error("Invalid %s '%s': %r", strarr_attr, val, exc) setattr(self, strarr_attr, []) for str_attr in ('df', 'kernel', 'os', 'os_version', 'distro'): setattr(self, str_attr, str(data.get(str_attr, ''))) self.dirty_cow = bool(data.get('dirty_cow')) self.unreachable_since = None self.updated_at = datetime.datetime.now() def as_dict(self): data = { key: getattr(self, key) for key in ( 'uptime', 'loadavg', 'cores', 'users', 'pub_ips', 'priv_ips', 'df', 'macs', 'kernel', 'os', 'os_version', 'distro', 'dirty_cow', 'updated_at', 'unreachable_since', ) } # Handle datetime objects for key in ('updated_at', 'unreachable_since'): if data[key]: data[key] = str(data[key].replace(tzinfo=None)) return data
class OccupancyData(me.Document): collectedAt = me.DateTimeField(required=True) occupancy = me.IntField(required=True)
class PoWWindow(ModelMixin, mg.Document): meta = {"collection": "zil_pow_windows", "strict": False} create_time = mg.DateTimeField() block_num = mg.IntField(required=True) estimated_next_pow = mg.DateTimeField() pow_start = mg.DateTimeField(default=datetime.utcnow) pow_end = mg.DateTimeField(default=datetime.utcnow) pow_window = mg.FloatField(default=0) epoch_window = mg.FloatField(default=0) @classmethod def get_latest_record(cls): return cls.objects().order_by("-create_time").first() @classmethod def get_latest_block_num(cls): latest_record = cls.get_latest_record() if not latest_record: return -1 return latest_record.block_num @classmethod def get_pow_window(cls, block_num=None): record = cls.objects(block_num=block_num).order_by("-create_time").first() if not record: return PowWork.calc_pow_window(block_num) return record.pow_start, record.pow_end @classmethod def avg_pow_time(cls, number_blocks=10): """ calc pow window from prev records """ query = cls.objects().order_by("-create_time") records = query.limit(number_blocks).all() pow_window_list = [r.pow_window for r in records if r.pow_window > 0] pow_window_list = sorted(pow_window_list) if len(pow_window_list) > 4: pow_window_list = pow_window_list[1:-1] pow_in_secs = 0 if len(pow_window_list) > 0: pow_in_secs = sum(pow_window_list) / len(pow_window_list) return pow_in_secs @classmethod def avg_epoch_time(cls, number_blocks=10): """ calc epoch window( pow included ) from prev records """ query = cls.objects().order_by("-create_time") records = query.limit(number_blocks).all() epoch_window_list = [r.epoch_window for r in records if r.epoch_window > 0] epoch_window_list = sorted(epoch_window_list) if len(epoch_window_list) > 4: epoch_window_list = epoch_window_list[1:-1] epoch_in_secs = 0 if len(epoch_window_list) > 0: epoch_in_secs = sum(epoch_window_list) / len(epoch_window_list) return epoch_in_secs @classmethod def seconds_to_next_pow(cls): last_record = cls.get_latest_record() if not last_record or not last_record.estimated_next_pow: return 0 now = datetime.utcnow() next_pow_time = last_record.estimated_next_pow if now > next_pow_time: logging.warning("we are missing some pow_window records") return 0 if now < last_record.pow_start + timedelta(seconds=last_record.pow_window): # we are in current pow window return 0 return (next_pow_time - now).total_seconds() @classmethod def update_pow_window(cls, work): if not work: return last_record_num = -1 last_record = cls.get_latest_record() if last_record: last_record_num = last_record.block_num if work.block_num < last_record_num: logging.critical("old record found in zil_pow_windows, " "pls clean the database") return if work.block_num == last_record_num: # pow is ongoing, do nothing return if work.block_num == last_record_num + 1: # new epoch start # 1. update prev record if last_record: pow_start, pow_end = PowWork.calc_pow_window(last_record_num) if pow_start and pow_end: pow_window = (pow_end - pow_start).total_seconds() epoch_window = (work.start_time - pow_start).total_seconds() last_record.update( pow_start=pow_start, pow_end=pow_end, pow_window=pow_window, epoch_window=epoch_window ) # 2. create new record and estimate next pow epoch_delta = timedelta(seconds=cls.avg_epoch_time()) pow_window = last_record.pow_window if last_record else cls.avg_pow_time() estimated_next_pow = work.start_time + epoch_delta new_record = cls.create( block_num=work.block_num, create_time=datetime.utcnow(), pow_start=work.start_time, pow_window=pow_window, estimated_next_pow=estimated_next_pow, ) return new_record