def html(self): """ HTML representation """ try: return _utils.stream(self._arch_item, PR_HTML) except MAPIErrorNotFound: return b''
def rtf(self): """ RTF representation """ try: return _utils.stream(self._arch_item, PR_RTF_COMPRESSED) except MAPIErrorNotFound: return b''
def check_stream(): queue = multiprocessing.JoinableQueue(10000) processes = [] for i in range(PROCESS_NUM): process = SingTweetProcess(queue, i) process.start() processes.append(process) for i in stream(): logger.debug('Got new data:\n%r', i) try: tweet = json.loads(i) username = tweet.get('user', {}).get('screen_name', None) if username is None: continue elif username.lower() == TWITTER_HANDLE: #skip my own shit continue else: tweet_text = tweet.get('text', None) if tweet_text: tweet_id = tweet.get('id_str', str(random.random() * 10000)) data =(tweet_text, tweet_id, username) logger.info('queueing %r', data) queue.put(data) except Exception, e: logger.error("unable to decode %r", i) logger.error("exception %r", e)
def text(self): """ Plain text representation """ try: return _utils.stream(self._arch_item, PR_BODY_W) # under windows them be utf-16le? except MAPIErrorNotFound: return u''
def Value(self): if self._Value is None: try: self._Value = _utils.stream(self.mapiobj, self.ulPropTag) except MAPIErrorNotFound: # XXX eg normalized subject streaming broken..? self._Value = None return self._Value
def eml(self, received_date=False): """ Return .eml version of item """ if self.emlfile is None: try: self.emlfile = _utils.stream(self.mapiobj, PR_EC_IMAP_EMAIL) except MAPIErrorNotFound: sopt = inetmapi.sending_options() sopt.no_recipients_workaround = True sopt.add_received_date = received_date self.emlfile = inetmapi.IMToINet(self.store.server.mapisession, None, self.mapiobj, sopt) return self.emlfile
def eml(self, received_date=False, stored=True): """ convert the object to a RFC 2822 mail :param received_date: add delivery date as received date :param stored: use the stored PR_EC_IMAP_EMAIL instead of calling inetmapi to convert """ if not stored: return self._generate_eml(received_date) if self._eml is None: try: self._eml = _utils.stream(self.mapiobj, PR_EC_IMAP_EMAIL) except MAPIErrorNotFound: self._eml = self._generate_eml(received_date) return self._eml
def post(self, src_id): ''' For streaming, start or end the streaming service. No payload is sent for this request. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: src = find_source(col, src_id) except IndexError: return 'No resource at that URL.', 404 filepath = src['rootdir'] #get filters f_client = db_client() f_col = db_collection(client, DATALOADER_DB_NAME, FILTERS_COL_NAME) filters = f_col.find() return utils.stream(src['ingest_id'], filepath)
def check_stream(): queue = multiprocessing.JoinableQueue(10000) processes = [] for i in range(PROCESS_NUM): process = FixMetadataNullsProcess(queue, i, do_it) process.start() processes.append(process) for i in stream(): try: tweet = json.loads(i) if tweet.get('user', {}).get('screen_name', None) == TWITTER_HANDLE: #skip my own shit continue tweet_text = tweet.get('text', None) if tweet_text: queue.put(tweet_text) except Exception, e: logger.error("unable to decode %r", i) logger.error("exception %r", e)
def data(self): """Binary data""" if self._data is None: self._data = _utils.stream(self.mapiobj, PR_ATTACH_DATA_BIN) return self._data
def _dump(self, attachments=True, archiver=True, skip_broken=False, _main_item=None): _main_item = _main_item or self log = self.server.log # props props = [] tag_data = {} bestbody = _prop.bestbody(self.mapiobj) for prop in self.props(): if (bestbody != PR_NULL and prop.proptag in (PR_BODY_W, PR_HTML, PR_RTF_COMPRESSED) and prop.proptag != bestbody): continue if prop.named: # named prop: prop.id_ system dependent... data = [prop.proptag, prop.mapiobj.Value, self.mapiobj.GetNamesFromIDs([prop.proptag], None, 0)[0]] if not archiver and data[2].guid == PSETID_Archive: continue else: data = [prop.proptag, prop.mapiobj.Value, None] props.append(data) tag_data[prop.proptag] = data self._convert_to_smtp(props, tag_data) # recipients recs = [] for row in self.table(PR_MESSAGE_RECIPIENTS): rprops = [] tag_data = {} for prop in row: data = [prop.proptag, prop.mapiobj.Value, None] rprops.append(data) tag_data[prop.proptag] = data recs.append(rprops) self._convert_to_smtp(rprops, tag_data) # attachments atts = [] # XXX optimize by looking at PR_MESSAGE_FLAGS? for row in self.table(PR_MESSAGE_ATTACHMENTS).dict_rows(): # XXX should we use GetAttachmentTable? try: num = row[PR_ATTACH_NUM] method = row.get(PR_ATTACH_METHOD, ATTACH_BY_VALUE) att = self.mapiobj.OpenAttach(num, IID_IAttachment, 0) if method == ATTACH_EMBEDDED_MSG: try: msg = att.OpenProperty(PR_ATTACH_DATA_OBJ, IID_IMessage, 0, MAPI_DEFERRED_ERRORS | MAPI_MODIFY) except MAPIErrorNoAccess: # XXX the following may fail for embedded items in certain public stores, while # the above does work (opening read-only doesn't work, but read-write works! wut!?) msg = att.OpenProperty(PR_ATTACH_DATA_OBJ, IID_IMessage, 0, MAPI_DEFERRED_ERRORS) item = Item(mapiobj=msg) item.server = self.server # XXX data = item._dump(_main_item=_main_item) # recursion atts.append(([[a, b, None] for a, b in row.items()], data)) elif method == ATTACH_BY_VALUE and attachments: try: data = _utils.stream(att, PR_ATTACH_DATA_BIN) except MAPIErrorNotFound: log.warn("no data found for attachment of item with entryid %s" % _main_item.entryid) data = '' atts.append(([[a, b, None] for a, b in row.items()], data)) except Exception as e: # XXX generalize so usable in more places log.error('could not serialize attachment for item with entryid %s' % _main_item.entryid) if skip_broken: log.error(traceback.format_exc()) if service and service.stats: service.stats['errors'] += 1 else: raise return { b'props': props, b'recipients': recs, b'attachments': atts, }
def prop(self, mapiobj, proptag, create=False, value=None, proptype=None): # XXX selfie if _is_int(proptag) or \ (_is_str(proptag) and ':' not in proptag): # search for property if _is_str(proptag): proptag = getattr(MAPI.Tags, proptag) try: sprop = HrGetOneProp(mapiobj, proptag) except MAPIErrorNotEnoughMemory: data = _utils.stream(mapiobj, proptag) sprop = SPropValue(proptag, data) except MAPIErrorNotFound: # not found, create it? if create: return create_prop(self, mapiobj, proptag, value=value, proptype=proptype) else: raise NotFoundError('no such property: %s' % REV_TAG.get(proptag, hex(proptag))) return Property(mapiobj, sprop) else: # named property proptag2, proptype2, namespace, name = _name_to_proptag( proptag, mapiobj, proptype) # search for property if proptype2: try: sprop = HrGetOneProp(mapiobj, proptag2) # XXX merge two main branches? return Property(mapiobj, sprop) except MAPIErrorNotEnoughMemory: data = _utils.stream(mapiobj, proptag2) sprop = SPropValue(proptag2, data) return Property(mapiobj, sprop) except MAPIErrorNotFound: pass else: for prop in self.props( namespace=namespace ): # XXX sloow, streaming? default pidlid type-db? if prop.name == name: return prop # not found, create it? if create: return create_prop(self, mapiobj, proptag, value=value, proptype=proptype) else: raise NotFoundError('no such property: %s' % proptag)
from utils import stream stream()
def train_loop(buffer_size, lab_dim, num_epochs, warmup_epoch, model, optimiser, criterion, reduce_lr, model_path, checkpoint_every_n, lr, duration = True): device = next(model.parameters()).device min_validation_loss = 2.0 validation_counter = 0 batch_size = 2300 # This gets the valid set if not duration: print("\n Loading validation data ... \n") dataset = get_tts_dataset(cfg.valid_list,cfg.acoustic_bin_no_sil_norm, cfg.bin_acoustic_feats,None, lab_dim, cfg.cmp_dim, None) else: print("\n Loading validation data ... \n") dataset = get_tts_dataset(cfg.valid_list,cfg.bin_no_sil_norm, cfg.bin_acoustic_feats,cfg.dur_no_sil_norm, cfg.lab_dim, cfg.cmp_dim, cfg.dur_dim) valid_dataloader = DataLoader(dataset, batch_size=1,collate_fn=lambda batch : collate_tts(batch), shuffle=True, num_workers=1) start_train = time.time() for epoch in range(num_epochs): msg = f'Epoch {epoch+1}/{num_epochs} \n' #stream(msg) start_time = time.time() batch_losses = [] print("Loading training data ... \n") # This gets the training set if not duration: train_dataset = get_tts_dataset(cfg.train_list,cfg.acoustic_bin_no_sil_norm, cfg.bin_acoustic_feats,None, lab_dim, cfg.cmp_dim,None) else: # This gets the training set train_dataset = get_tts_dataset(cfg.train_list,cfg.bin_no_sil_norm, cfg.bin_acoustic_feats,cfg.dur_no_sil_norm, cfg.lab_dim, cfg.cmp_dim, cfg.dur_dim) train_dataloader = DataLoader(train_dataset, batch_size=2300,collate_fn=lambda batch : collate_tts(batch), shuffle=True, num_workers=1) for idx, (x, t,ids, d, frames, dur_len) in enumerate(train_dataloader): msg =f'\n Batch {idx+1}/{len(train_dataloader)}\n' stream(msg) model.train() n_frames = 256 training_losses = [] iters = int(len(x) / n_frames) print(iters) for i in range(iters): optimiser.zero_grad() start = i*n_frames end = (i+1)*n_frames if end > x.shape[0]: end = x.shape[0] print("end frames", i) x_in = x[start:end,:] if duration: d_in = d[start:end,:] lab, dur = x_in.to(device), d_in.to(device) else: t_in = t[start:end,:] lab, targ = x_in.to(device), t_in.to(device) #print(x_in.shape) #print(t.shape) # Forward pass y_pred = model(lab) #print(y_pred.shape) # Compute Loss if duration: mse_loss = criterion(y_pred, dur) else: #mse_loss = criterion(y_pred, targ) #print("mse torch", mse_loss) finetune = torch.mean(torch.sum((y_pred - targ)**2, dim=1)) #loss = mse_loss loss = finetune # Backward pass loss.backward() optimiser.step() training_losses.append(np_now(finetune)) end_time = time.time() #print("iter time", end_time-start_time) print(training_losses) this_batch_loss = np.mean(training_losses) if epoch > warmup_epoch: reduce_lr = True if reduce_lr: lr = lr * 0.5 optimiser = optim.Adam(model.parameters(), lr) validation_losses = [] with torch.no_grad(): print("Validating ...") total = 0 for idx, (x, t,ids, d, frames, dur_len) in enumerate(valid_dataloader): if duration: val_in_x, val_d = x.to(device), d.to(device) else: val_in_x, val_t = x.to(device), t.to(device) model.eval() # Forward pass val_prediction = model(val_in_x) # Compute Loss if duration: mse_loss = criterion(val_prediction, val_d) else: #mse_loss = criterion(val_prediction, val_t) finetune = torch.mean(torch.sum((val_prediction - val_t)**2, dim=1)) val_loss = finetune validation_losses.append(np_now(finetune)) checkpoint = f'{model_path}/latest_model.pyt' model.save(checkpoint) #print("validation losses",len(validation_losses)) this_validation_loss = np.mean(validation_losses) msg = f'\nEpoch {epoch}: mean val loss: {this_validation_loss} \n' stream(msg) end_time = time.time() epoch_time = end_time - start_time msg = f'\nEpoch {epoch}: train loss: {this_batch_loss} time: {epoch_time}\n' stream(msg) #optimiser = exp_lr_scheduler(optimiser, epoch) end_train = time.time() total_time = end_train - start_train msg = f'Total training time: {total_time}' stream(msg) model.save(checkpoint)
def video_feed(): return Response(stream(), mimetype="multipart/x-mixed-replace; boundary=frame")