Example #1
0
    def html(self):
        """ HTML representation """

        try:
            return _utils.stream(self._arch_item, PR_HTML)
        except MAPIErrorNotFound:
            return b''
Example #2
0
    def rtf(self):
        """ RTF representation """

        try:
            return _utils.stream(self._arch_item, PR_RTF_COMPRESSED)
        except MAPIErrorNotFound:
            return b''
Example #3
0
def check_stream():
    queue = multiprocessing.JoinableQueue(10000)

    processes = []
    for i in range(PROCESS_NUM):
        process = SingTweetProcess(queue, i)
        process.start()
        processes.append(process)

    for i in stream():
        logger.debug('Got new data:\n%r', i)
        try:
            tweet = json.loads(i)
            username = tweet.get('user', {}).get('screen_name', None)
            if username is None:
                continue
            elif username.lower() == TWITTER_HANDLE:
                #skip my own shit
                continue
            else:
                tweet_text = tweet.get('text', None)
                if tweet_text:
                    tweet_id = tweet.get('id_str', str(random.random() * 10000))
                    data =(tweet_text, tweet_id, username)
                    logger.info('queueing %r', data)
                    queue.put(data)

        except Exception, e:
            logger.error("unable to decode %r", i)
            logger.error("exception %r", e)
Example #4
0
    def text(self):
        """ Plain text representation """

        try:
            return _utils.stream(self._arch_item, PR_BODY_W) # under windows them be utf-16le?
        except MAPIErrorNotFound:
            return u''
Example #5
0
 def Value(self):
     if self._Value is None:
         try:
             self._Value = _utils.stream(self.mapiobj, self.ulPropTag)
         except MAPIErrorNotFound:  # XXX eg normalized subject streaming broken..?
             self._Value = None
     return self._Value
Example #6
0
 def eml(self, received_date=False):
     """ Return .eml version of item """
     if self.emlfile is None:
         try:
             self.emlfile = _utils.stream(self.mapiobj, PR_EC_IMAP_EMAIL)
         except MAPIErrorNotFound:
             sopt = inetmapi.sending_options()
             sopt.no_recipients_workaround = True
             sopt.add_received_date = received_date
             self.emlfile = inetmapi.IMToINet(self.store.server.mapisession,
                                              None, self.mapiobj, sopt)
     return self.emlfile
Example #7
0
    def eml(self, received_date=False, stored=True):
        """ convert the object to a RFC 2822 mail

        :param received_date: add delivery date as received date
        :param stored: use the stored PR_EC_IMAP_EMAIL instead of calling inetmapi to convert
        """
        if not stored:
            return self._generate_eml(received_date)

        if self._eml is None:
            try:
                self._eml = _utils.stream(self.mapiobj, PR_EC_IMAP_EMAIL)
            except MAPIErrorNotFound:
                self._eml = self._generate_eml(received_date)
        return self._eml
Example #8
0
            def post(self, src_id):
                '''
                For streaming, start or end the streaming service.
                No payload is sent for this request.
                '''
                client = db_client()
                col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME)
                try:
                    src = find_source(col, src_id)

                except IndexError:
                    return 'No resource at that URL.', 404

                filepath = src['rootdir']

                #get filters
                f_client = db_client()
                f_col = db_collection(client, DATALOADER_DB_NAME, FILTERS_COL_NAME)
                filters = f_col.find()
                return utils.stream(src['ingest_id'], filepath)
Example #9
0
def check_stream():
    queue = multiprocessing.JoinableQueue(10000)

    processes = []
    for i in range(PROCESS_NUM):
        process = FixMetadataNullsProcess(queue, i, do_it)
        process.start()
        processes.append(process)

    for i in stream():
        try:
            tweet = json.loads(i)
            if tweet.get('user', {}).get('screen_name', None) == TWITTER_HANDLE:
                #skip my own shit
                continue
            tweet_text = tweet.get('text', None)
            if tweet_text:
                queue.put(tweet_text)

        except Exception, e:
            logger.error("unable to decode %r", i)
            logger.error("exception %r", e)
Example #10
0
 def data(self):
     """Binary data"""
     if self._data is None:
         self._data = _utils.stream(self.mapiobj, PR_ATTACH_DATA_BIN)
     return self._data
Example #11
0
    def _dump(self, attachments=True, archiver=True, skip_broken=False, _main_item=None):
        _main_item = _main_item or self
        log = self.server.log

        # props
        props = []
        tag_data = {}
        bestbody = _prop.bestbody(self.mapiobj)
        for prop in self.props():
            if (bestbody != PR_NULL and prop.proptag in (PR_BODY_W, PR_HTML, PR_RTF_COMPRESSED) and prop.proptag != bestbody):
                continue
            if prop.named: # named prop: prop.id_ system dependent...
                data = [prop.proptag, prop.mapiobj.Value, self.mapiobj.GetNamesFromIDs([prop.proptag], None, 0)[0]]
                if not archiver and data[2].guid == PSETID_Archive:
                    continue
            else:
                data = [prop.proptag, prop.mapiobj.Value, None]
            props.append(data)
            tag_data[prop.proptag] = data
        self._convert_to_smtp(props, tag_data)

        # recipients
        recs = []
        for row in self.table(PR_MESSAGE_RECIPIENTS):
            rprops = []
            tag_data = {}
            for prop in row:
                data = [prop.proptag, prop.mapiobj.Value, None]
                rprops.append(data)
                tag_data[prop.proptag] = data
            recs.append(rprops)
            self._convert_to_smtp(rprops, tag_data)

        # attachments
        atts = []
        # XXX optimize by looking at PR_MESSAGE_FLAGS?
        for row in self.table(PR_MESSAGE_ATTACHMENTS).dict_rows(): # XXX should we use GetAttachmentTable?
            try:
                num = row[PR_ATTACH_NUM]
                method = row.get(PR_ATTACH_METHOD, ATTACH_BY_VALUE)
                att = self.mapiobj.OpenAttach(num, IID_IAttachment, 0)
                if method == ATTACH_EMBEDDED_MSG:
                    try:
                        msg = att.OpenProperty(PR_ATTACH_DATA_OBJ, IID_IMessage, 0, MAPI_DEFERRED_ERRORS | MAPI_MODIFY)
                    except MAPIErrorNoAccess:
                        # XXX the following may fail for embedded items in certain public stores, while
                        # the above does work (opening read-only doesn't work, but read-write works! wut!?)
                        msg = att.OpenProperty(PR_ATTACH_DATA_OBJ, IID_IMessage, 0, MAPI_DEFERRED_ERRORS)
                    item = Item(mapiobj=msg)
                    item.server = self.server # XXX
                    data = item._dump(_main_item=_main_item) # recursion
                    atts.append(([[a, b, None] for a, b in row.items()], data))
                elif method == ATTACH_BY_VALUE and attachments:
                    try:
                        data = _utils.stream(att, PR_ATTACH_DATA_BIN)
                    except MAPIErrorNotFound:
                        log.warn("no data found for attachment of item with entryid %s" % _main_item.entryid)
                        data = ''
                    atts.append(([[a, b, None] for a, b in row.items()], data))
            except Exception as e: # XXX generalize so usable in more places
                log.error('could not serialize attachment for item with entryid %s' % _main_item.entryid)
                if skip_broken:
                    log.error(traceback.format_exc())
                    if service and service.stats:
                        service.stats['errors'] += 1
                else:
                    raise

        return {
            b'props': props,
            b'recipients': recs,
            b'attachments': atts,
        }
Example #12
0
def prop(self,
         mapiobj,
         proptag,
         create=False,
         value=None,
         proptype=None):  # XXX selfie
    if _is_int(proptag) or \
       (_is_str(proptag) and ':' not in proptag):
        # search for property
        if _is_str(proptag):
            proptag = getattr(MAPI.Tags, proptag)
        try:
            sprop = HrGetOneProp(mapiobj, proptag)
        except MAPIErrorNotEnoughMemory:
            data = _utils.stream(mapiobj, proptag)
            sprop = SPropValue(proptag, data)
        except MAPIErrorNotFound:
            # not found, create it?
            if create:
                return create_prop(self,
                                   mapiobj,
                                   proptag,
                                   value=value,
                                   proptype=proptype)
            else:
                raise NotFoundError('no such property: %s' %
                                    REV_TAG.get(proptag, hex(proptag)))
        return Property(mapiobj, sprop)

    else:  # named property
        proptag2, proptype2, namespace, name = _name_to_proptag(
            proptag, mapiobj, proptype)

        # search for property
        if proptype2:
            try:
                sprop = HrGetOneProp(mapiobj,
                                     proptag2)  # XXX merge two main branches?
                return Property(mapiobj, sprop)
            except MAPIErrorNotEnoughMemory:
                data = _utils.stream(mapiobj, proptag2)
                sprop = SPropValue(proptag2, data)
                return Property(mapiobj, sprop)
            except MAPIErrorNotFound:
                pass
        else:
            for prop in self.props(
                    namespace=namespace
            ):  # XXX sloow, streaming? default pidlid type-db?
                if prop.name == name:
                    return prop

        # not found, create it?
        if create:
            return create_prop(self,
                               mapiobj,
                               proptag,
                               value=value,
                               proptype=proptype)
        else:
            raise NotFoundError('no such property: %s' % proptag)
Example #13
0
from utils import stream
stream()
Example #14
0
def train_loop(buffer_size, lab_dim, num_epochs, warmup_epoch,  model, optimiser, criterion, reduce_lr, model_path, checkpoint_every_n, lr, duration = True):
    device = next(model.parameters()).device

    min_validation_loss = 2.0
    validation_counter = 0
    batch_size = 2300
    # This gets the valid set

    if not duration:
        print("\n Loading validation data ... \n")
        dataset = get_tts_dataset(cfg.valid_list,cfg.acoustic_bin_no_sil_norm, cfg.bin_acoustic_feats,None, lab_dim, cfg.cmp_dim, None)
    else:

        print("\n Loading validation data ... \n")
        dataset = get_tts_dataset(cfg.valid_list,cfg.bin_no_sil_norm, cfg.bin_acoustic_feats,cfg.dur_no_sil_norm, cfg.lab_dim, cfg.cmp_dim, cfg.dur_dim)


    valid_dataloader = DataLoader(dataset, batch_size=1,collate_fn=lambda batch : collate_tts(batch),
                            shuffle=True, num_workers=1)

    start_train = time.time()
    for epoch in range(num_epochs):

        msg = f'Epoch {epoch+1}/{num_epochs} \n'
        #stream(msg)

        start_time = time.time()
        batch_losses = []

        print("Loading training data ... \n")

        # This gets the training set
        if not duration:
            train_dataset = get_tts_dataset(cfg.train_list,cfg.acoustic_bin_no_sil_norm, cfg.bin_acoustic_feats,None, lab_dim, cfg.cmp_dim,None)
        else:
            # This gets the training set
            train_dataset = get_tts_dataset(cfg.train_list,cfg.bin_no_sil_norm, cfg.bin_acoustic_feats,cfg.dur_no_sil_norm, cfg.lab_dim, cfg.cmp_dim, cfg.dur_dim)


        train_dataloader = DataLoader(train_dataset, batch_size=2300,collate_fn=lambda batch : collate_tts(batch),
                            shuffle=True, num_workers=1)



        for idx, (x, t,ids, d, frames, dur_len) in enumerate(train_dataloader):

            msg =f'\n Batch {idx+1}/{len(train_dataloader)}\n'
            stream(msg)

            model.train()

            n_frames = 256
            training_losses = []
            iters = int(len(x) / n_frames)
            print(iters)


            for i in range(iters):

                optimiser.zero_grad()
                start = i*n_frames
                end = (i+1)*n_frames

                if end > x.shape[0]:
                    end = x.shape[0]
                    print("end frames", i)

                x_in = x[start:end,:]



                if duration:
                    d_in = d[start:end,:]
                    lab, dur = x_in.to(device), d_in.to(device)
                else:
                    t_in = t[start:end,:]
                    lab, targ = x_in.to(device), t_in.to(device)

                #print(x_in.shape)
                #print(t.shape)

                # Forward pass
                y_pred = model(lab)
                #print(y_pred.shape)
                # Compute Loss
                if duration:
                    mse_loss = criterion(y_pred, dur)
                else:
                    #mse_loss = criterion(y_pred, targ)
                    #print("mse torch", mse_loss)
                    finetune = torch.mean(torch.sum((y_pred - targ)**2, dim=1))



                #loss =  mse_loss
                loss = finetune

                # Backward pass
                loss.backward()
                optimiser.step()

                training_losses.append(np_now(finetune))

                end_time = time.time()
                #print("iter time", end_time-start_time)

            print(training_losses)
            this_batch_loss = np.mean(training_losses)

        if epoch > warmup_epoch:
            reduce_lr = True

        if reduce_lr:
            lr = lr * 0.5
            optimiser = optim.Adam(model.parameters(), lr)

        validation_losses = []
        with torch.no_grad():
            print("Validating ...")

            total = 0
            for idx, (x, t,ids, d, frames, dur_len) in enumerate(valid_dataloader):

                if duration:
                    val_in_x, val_d = x.to(device), d.to(device)
                else:
                    val_in_x, val_t = x.to(device), t.to(device)

                model.eval()
                # Forward pass
                val_prediction = model(val_in_x)

                # Compute Loss
                if duration:
                    mse_loss = criterion(val_prediction, val_d)
                else:
                    #mse_loss = criterion(val_prediction, val_t)
                    finetune = torch.mean(torch.sum((val_prediction - val_t)**2, dim=1))


                val_loss =  finetune

                validation_losses.append(np_now(finetune))


        checkpoint = f'{model_path}/latest_model.pyt'
        model.save(checkpoint)
        #print("validation losses",len(validation_losses))
        this_validation_loss = np.mean(validation_losses)
        msg = f'\nEpoch {epoch}: mean val loss: {this_validation_loss} \n'
        stream(msg)




        end_time = time.time()

        epoch_time = end_time - start_time
        msg  = f'\nEpoch {epoch}: train loss: {this_batch_loss} time: {epoch_time}\n'
        stream(msg)
        #optimiser = exp_lr_scheduler(optimiser, epoch)



    end_train = time.time()
    total_time = end_train - start_train
    msg = f'Total training time: {total_time}'
    stream(msg)
    model.save(checkpoint)
Example #15
0
def video_feed():
    return Response(stream(), mimetype="multipart/x-mixed-replace; boundary=frame")